From 5fdc6d4fa48d25309d1a774b04e79debbf797e75 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 14 Oct 2023 09:05:05 +0200 Subject: [PATCH 01/50] integration of machine based reading order detection --- qurator/eynollah/eynollah.py | 222 +++++++++++++++++++++++++++--- qurator/eynollah/utils/contour.py | 11 +- 2 files changed, 209 insertions(+), 24 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 4b1b5e9..b83db98 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -78,6 +78,7 @@ from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter +MIN_AREA_REGION = 0.0005 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -225,6 +226,7 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" + self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -246,6 +248,7 @@ class Eynollah: self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) + self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) self.ls_imgs = os.listdir(self.dir_in) @@ -264,6 +267,7 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement) + self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) self.ls_imgs = os.listdir(self.dir_in) @@ -1647,9 +1651,39 @@ class Eynollah: mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + + #for jv in range(1): + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + + + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -1785,7 +1819,7 @@ class Eynollah: polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only, 1, 0.00001) polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only, 1, 0.00001) @@ -1853,7 +1887,7 @@ class Eynollah: mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - polygons_lines_xml = textline_con_fil = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) @@ -2821,13 +2855,157 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model + + def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + + #print(text_regions_p.shape) + y_len = text_regions_p.shape[0] + x_len = text_regions_p.shape[1] + + img_poly = np.zeros((y_len,x_len), dtype='uint8') + + unique_pix = np.unique(text_regions_p) + #print(unique_pix, 'unique_pix') + + #for pix in unique_pix: + #print(pix) + #plt.imshow((text_regions_p[:,:]==pix)*1 ) + #plt.show() + + img_poly[text_regions_p[:,:]==1] = 1 + img_poly[text_regions_p[:,:]==2] = 2 + img_poly[text_regions_p[:,:]==3] = 4 + img_poly[text_regions_p[:,:]==6] = 5 + + #plt.imshow(text_regions_p) + #plt.show() + + + #plt.imshow(img_poly) + #plt.show() + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + + height1 =672#448 + width1 = 448#224 + + height2 =672#448 + width2= 448#224 + + height3 =672#448 + width3 = 448#224 + + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + + img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + + for j in range(len(cy_main)): + #print(j, int(y_max_main[j]), x_min_main[j], x_max_main[j] ) + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + #plt.imshow(img_header_and_sep[:,:]) + #plt.show() + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + #id_all_text = id_paragraph + id_header + + #texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] + #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] + + #co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area) + + labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + for i in range(len(co_text_all)): + img_label = np.zeros((y_len,x_len,3),dtype='uint8') + img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) + labels_con[:,:,i] = img_label[:,:,0] + + + img3= np.copy(img_poly) + + labels_con = resize_image(labels_con, height1, width1) + + img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + + img3= resize_image (img3, height3, width3) + + img3 = img3.astype(np.uint16) + + + #plt.imshow(img3) + #plt.show() + + order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 + + for i in range(labels_con.shape[2]): + for j in range(labels_con.shape[2]): + if j>i: + img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) + img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) + #img1 = img1.astype(np.uint16) + #img2 = img2.astype(np.uint16) + + img2[:,:,0][img3[:,:]==5] = 2 + img2[:,:,0][img_header_and_sep[:,:]==1] = 3 + + + + img1[:,:,0][img3[:,:]==5] = 2 + img1[:,:,0][img_header_and_sep[:,:]==1] = 3 + + + #plt.imshow(labels_con[:,:,i]) + #plt.show() + + #plt.imshow(img2[:,:,0]) + #plt.show() + + + #plt.imshow(img1[:,:,0]) + #plt.show() + + #sys.exit() + input_1= np.zeros( (height1, width1,3)) + + input_1[:,:,0] = img1[:,:,0]/3. + input_1[:,:,2] = img2[:,:,0]/3. + input_1[:,:,1] = img3[:,:]/5. + + #y_pr=model.predict([img1.reshape(1,height1,width1,3) , img2.reshape(1,height2,width2,3),img3.reshape(1,height3,width3,3) ], verbose=2) + y_pr=model_ro_machine.predict(input_1.reshape(1,height1,width1,3) , verbose=0) + #print(y_pr) + + if y_pr>=0.5: + order_class = 1 + else: + order_class = 0 + + order_matrix[i,j] = y_pr#order_class + order_matrix[j,i] = 1-y_pr#int( 1 - order_class) + + + sum_mat = np.sum(order_matrix, axis=1) + index_sort = np.argsort(sum_mat) + index_sort = index_sort[::-1] + + print(index_sort) + REGION_ID_TEMPLATE = 'region_%04d' + order_of_texts = [] + id_of_texts = [] + for order, id_text in enumerate(index_sort): + order_of_texts.append(id_text) + id_of_texts.append( REGION_ID_TEMPLATE % order ) + + + return order_of_texts, id_of_texts def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - + + self.reading_order_machine_based = True#True t0_tot = time.time() @@ -2896,7 +3074,7 @@ class Eynollah: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - min_con_area = 0.000005 + ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) @@ -2906,8 +3084,8 @@ class Eynollah: areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) #self.logger.info('areas_cnt_text %s', areas_cnt_text) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -2983,8 +3161,8 @@ class Eynollah: areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > min_con_area] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > min_con_area] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) @@ -3086,21 +3264,33 @@ class Eynollah: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() if self.full_layout: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts + + print(id_of_texts_tot,'id_of_texts_tot') + print(order_text_new,'order_text_new') + else: contours_only_text_parent_h = None - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index bac8235..53b39b5 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -44,8 +44,8 @@ def get_text_region_boxes_by_given_contours(contours): def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - jv = 0 - for c in contours: + + for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -53,14 +53,12 @@ def filter_contours_area_of_image(image, contours, hierarchy, max_area, min_area area = polygon.area if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hierarchy[0][jv][3] == -1: # and hierarchy[0][jv][3]==-1 : found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint)) - jv += 1 return found_polygons_early def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, min_area): found_polygons_early = list() - jv = 0 - for c in contours: + for jv,c in enumerate(contours): if len(c) < 3: # A polygon cannot have less than 3 points continue @@ -73,7 +71,6 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hierarchy[0][jv][3]==-1 : # print(c[0][0][1]) found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) - jv += 1 return found_polygons_early def find_new_features_of_contours(contours_main): @@ -234,8 +231,6 @@ def get_textregion_contours_in_org_image_multi2(cnts, img, slope_first): with Pool(cpu_count()) as p: cnts_org = p.starmap(loop_contour_image, [(index_l,cnts, img,slope_first) for index_l in range(len(cnts))]) - print(len(cnts_org),'lendiha') - return cnts_org def get_textregion_contours_in_org_image(cnts, img, slope_first): From 49c93149a49b103b6434fee79ef28517fa4b13f9 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 10:01:28 +0200 Subject: [PATCH 02/50] machine based reading order inference with a variable batch size --- qurator/eynollah/eynollah.py | 96 +++++++++++++++--------------------- 1 file changed, 41 insertions(+), 55 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b83db98..35992c9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2857,32 +2857,20 @@ class Eynollah: return model def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): - - #print(text_regions_p.shape) y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] img_poly = np.zeros((y_len,x_len), dtype='uint8') unique_pix = np.unique(text_regions_p) - #print(unique_pix, 'unique_pix') - - #for pix in unique_pix: - #print(pix) - #plt.imshow((text_regions_p[:,:]==pix)*1 ) - #plt.show() + img_poly[text_regions_p[:,:]==1] = 1 img_poly[text_regions_p[:,:]==2] = 2 img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - #plt.imshow(text_regions_p) - #plt.show() - - - #plt.imshow(img_poly) - #plt.show() + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 @@ -2900,19 +2888,11 @@ class Eynollah: img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') for j in range(len(cy_main)): - #print(j, int(y_max_main[j]), x_min_main[j], x_max_main[j] ) img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - #plt.imshow(img_header_and_sep[:,:]) - #plt.show() co_text_all = contours_only_text_parent + contours_only_text_parent_h - #id_all_text = id_paragraph + id_header - - #texts_corr_order_index = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ] - #texts_corr_order_index_int = [int(x) for x in texts_corr_order_index] - #co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area) labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') for i in range(len(co_text_all)): @@ -2932,63 +2912,69 @@ class Eynollah: img3 = img3.astype(np.uint16) - #plt.imshow(img3) - #plt.show() - order_matrix = np.zeros((labels_con.shape[2], labels_con.shape[2]))-1 + inference_bs = 6 + tot_counter = 1 + batch_counter = 0 + i_indexer = [] + j_indexer =[] + + input_1= np.zeros( (inference_bs, height1, width1,3)) + tot_iteration = int( ( labels_con.shape[2]*(labels_con.shape[2]-1) )/2. ) + full_bs_ite= tot_iteration//inference_bs + last_bs = tot_iteration % inference_bs + + #print(labels_con.shape[2],"number of regions for reading order") for i in range(labels_con.shape[2]): for j in range(labels_con.shape[2]): if j>i: img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) - #img1 = img1.astype(np.uint16) - #img2 = img2.astype(np.uint16) img2[:,:,0][img3[:,:]==5] = 2 img2[:,:,0][img_header_and_sep[:,:]==1] = 3 - - img1[:,:,0][img3[:,:]==5] = 2 img1[:,:,0][img_header_and_sep[:,:]==1] = 3 - #plt.imshow(labels_con[:,:,i]) - #plt.show() - - #plt.imshow(img2[:,:,0]) - #plt.show() - + i_indexer.append(i) + j_indexer.append(j) - #plt.imshow(img1[:,:,0]) - #plt.show() + input_1[batch_counter,:,:,0] = img1[:,:,0]/3. + input_1[batch_counter,:,:,2] = img2[:,:,0]/3. + input_1[batch_counter,:,:,1] = img3[:,:]/5. - #sys.exit() - input_1= np.zeros( (height1, width1,3)) + batch_counter = batch_counter+1 - input_1[:,:,0] = img1[:,:,0]/3. - input_1[:,:,2] = img2[:,:,0]/3. - input_1[:,:,1] = img3[:,:]/5. - - #y_pr=model.predict([img1.reshape(1,height1,width1,3) , img2.reshape(1,height2,width2,3),img3.reshape(1,height3,width3,3) ], verbose=2) - y_pr=model_ro_machine.predict(input_1.reshape(1,height1,width1,3) , verbose=0) - #print(y_pr) + if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + y_pr=model_ro_machine.predict(input_1 , verbose=0) - if y_pr>=0.5: - order_class = 1 - else: - order_class = 0 + if batch_counter==inference_bs: + iteration_batches = inference_bs + else: + iteration_batches = last_bs + for jb in range(iteration_batches): + if y_pr[jb][0]>=0.5: + order_class = 1 + else: + order_class = 0 + + order_matrix[i_indexer[jb],j_indexer[jb]] = y_pr[jb][0]#order_class + order_matrix[j_indexer[jb],i_indexer[jb]] = 1-y_pr[jb][0]#int( 1 - order_class) + + batch_counter = 0 - order_matrix[i,j] = y_pr#order_class - order_matrix[j,i] = 1-y_pr#int( 1 - order_class) + i_indexer = [] + j_indexer = [] + tot_counter = tot_counter+1 sum_mat = np.sum(order_matrix, axis=1) index_sort = np.argsort(sum_mat) index_sort = index_sort[::-1] - print(index_sort) REGION_ID_TEMPLATE = 'region_%04d' order_of_texts = [] id_of_texts = [] @@ -3272,13 +3258,12 @@ class Eynollah: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts - print(id_of_texts_tot,'id_of_texts_tot') - print(order_text_new,'order_text_new') else: contours_only_text_parent_h = None @@ -3291,6 +3276,7 @@ class Eynollah: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts From 59c0d90e5af7ed3f1d3d8d7a78ecdcc17eb2fb59 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 10:17:46 +0200 Subject: [PATCH 03/50] machine based reading order inference & optimized algorithm --- qurator/eynollah/eynollah.py | 153 ++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 3 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 35992c9..63e71cb 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2855,7 +2855,6 @@ class Eynollah: model = load_model(model_file , compile=False,custom_objects = {"PatchEncoder": PatchEncoder, "Patches": Patches}) return model - def do_order_of_regions_with_machine(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): y_len = text_regions_p.shape[0] x_len = text_regions_p.shape[1] @@ -2983,6 +2982,154 @@ class Eynollah: id_of_texts.append( REGION_ID_TEMPLATE % order ) + return order_of_texts, id_of_texts + + def update_list_and_return_first_biger_than_one_length(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): + list_inp.pop(index_element_to_be_updated) + if len(pr_list)>0: + list_inp.insert(index_element_to_be_updated, pr_list) + else: + index_element_to_be_updated = index_element_to_be_updated -1 + + list_inp.insert(index_element_to_be_updated+1, [innner_index_pr_pos]) + if len(pos_list)>0: + list_inp.insert(index_element_to_be_updated+2, pos_list) + + len_all_elements = [len(i) for i in list_inp] + list_len_bigger_1 = np.where(np.array(len_all_elements)>1) + list_len_bigger_1 = list_len_bigger_1[0] + + if len(list_len_bigger_1)>0: + early_list_bigger_than_one = list_len_bigger_1[0] + else: + early_list_bigger_than_one = -20 + return list_inp, early_list_bigger_than_one + def do_order_of_regions_with_machine_optimized_algorithm(self,contours_only_text_parent, contours_only_text_parent_h, text_regions_p): + y_len = text_regions_p.shape[0] + x_len = text_regions_p.shape[1] + + img_poly = np.zeros((y_len,x_len), dtype='uint8') + + unique_pix = np.unique(text_regions_p) + + + img_poly[text_regions_p[:,:]==1] = 1 + img_poly[text_regions_p[:,:]==2] = 2 + img_poly[text_regions_p[:,:]==3] = 4 + img_poly[text_regions_p[:,:]==6] = 5 + + + model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + + height1 =672#448 + width1 = 448#224 + + height2 =672#448 + width2= 448#224 + + height3 =672#448 + width3 = 448#224 + + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + + img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + + + labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') + for i in range(len(co_text_all)): + img_label = np.zeros((y_len,x_len,3),dtype='uint8') + img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1)) + labels_con[:,:,i] = img_label[:,:,0] + + + img3= np.copy(img_poly) + + labels_con = resize_image(labels_con, height1, width1) + + img_header_and_sep = resize_image(img_header_and_sep, height1, width1) + + img3= resize_image (img3, height3, width3) + + img3 = img3.astype(np.uint16) + + inference_bs = 4 + input_1= np.zeros( (inference_bs, height1, width1,3)) + starting_list_of_regions = [] + starting_list_of_regions.append( list(range(labels_con.shape[2])) ) + index_update = 0 + index_selected = starting_list_of_regions[0] + #print(labels_con.shape[2],"number of regions for reading order") + while index_update>=0: + ij_list = starting_list_of_regions[index_update] + i = ij_list[0] + ij_list.pop(0) + + pr_list = [] + post_list = [] + + batch_counter = 0 + tot_counter = 1 + + tot_iteration = len(ij_list) + full_bs_ite= tot_iteration//inference_bs + last_bs = tot_iteration % inference_bs + + jbatch_indexer =[] + for j in ij_list: + img1= np.repeat(labels_con[:,:,i][:, :, np.newaxis], 3, axis=2) + img2 = np.repeat(labels_con[:,:,j][:, :, np.newaxis], 3, axis=2) + + img2[:,:,0][img3[:,:]==5] = 2 + img2[:,:,0][img_header_and_sep[:,:]==1] = 3 + + img1[:,:,0][img3[:,:]==5] = 2 + img1[:,:,0][img_header_and_sep[:,:]==1] = 3 + + jbatch_indexer.append(j) + + input_1[batch_counter,:,:,0] = img1[:,:,0]/3. + input_1[batch_counter,:,:,2] = img2[:,:,0]/3. + input_1[batch_counter,:,:,1] = img3[:,:]/5. + + batch_counter = batch_counter+1 + + if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): + y_pr=model_ro_machine.predict(input_1 , verbose=0) + + if batch_counter==inference_bs: + iteration_batches = inference_bs + else: + iteration_batches = last_bs + for jb in range(iteration_batches): + if y_pr[jb][0]>=0.5: + post_list.append(jbatch_indexer[jb]) + else: + pr_list.append(jbatch_indexer[jb]) + + batch_counter = 0 + jbatch_indexer = [] + + tot_counter = tot_counter+1 + + starting_list_of_regions, index_update = self.update_list_and_return_first_biger_than_one_length(index_update, i, pr_list, post_list,starting_list_of_regions) + + index_sort = [i[0] for i in starting_list_of_regions ] + + REGION_ID_TEMPLATE = 'region_%04d' + order_of_texts = [] + id_of_texts = [] + for order, id_text in enumerate(index_sort): + order_of_texts.append(id_text) + id_of_texts.append( REGION_ID_TEMPLATE % order ) + + return order_of_texts, id_of_texts def run(self): @@ -3252,7 +3399,7 @@ class Eynollah: if self.full_layout: if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) @@ -3268,7 +3415,7 @@ class Eynollah: else: contours_only_text_parent_h = None if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) From 941d87328a45ad6df5df27c0a84a4b695de65c67 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Oct 2023 11:19:30 +0200 Subject: [PATCH 04/50] machine based reading order & works for not full layout case --- qurator/eynollah/eynollah.py | 84 ++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 63e71cb..c008476 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2881,16 +2881,17 @@ class Eynollah: height3 =672#448 width3 = 448#224 - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h + if contours_only_text_parent_h: + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + else: + co_text_all = contours_only_text_parent labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') @@ -2984,7 +2985,7 @@ class Eynollah: return order_of_texts, id_of_texts - def update_list_and_return_first_biger_than_one_length(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): + def update_list_and_return_first_with_length_bigger_than_one(self,index_element_to_be_updated, innner_index_pr_pos, pr_list, pos_list,list_inp): list_inp.pop(index_element_to_be_updated) if len(pr_list)>0: list_inp.insert(index_element_to_be_updated, pr_list) @@ -3030,16 +3031,17 @@ class Eynollah: height3 =672#448 width3 = 448#224 - _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) - - img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8') - - for j in range(len(cy_main)): - img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 - - co_text_all = contours_only_text_parent + contours_only_text_parent_h + if contours_only_text_parent_h: + _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(contours_only_text_parent_h) + + for j in range(len(cy_main)): + img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1 + + co_text_all = contours_only_text_parent + contours_only_text_parent_h + else: + co_text_all = contours_only_text_parent labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8') @@ -3118,7 +3120,7 @@ class Eynollah: tot_counter = tot_counter+1 - starting_list_of_regions, index_update = self.update_list_and_return_first_biger_than_one_length(index_update, i, pr_list, post_list,starting_list_of_regions) + starting_list_of_regions, index_update = self.update_list_and_return_first_with_length_bigger_than_one(index_update, i, pr_list, post_list,starting_list_of_regions) index_sort = [i[0] for i in starting_list_of_regions ] @@ -3138,7 +3140,7 @@ class Eynollah: """ self.logger.debug("enter run") - self.reading_order_machine_based = True#True + self.reading_order_machine_based = True#False#True#True t0_tot = time.time() @@ -3359,32 +3361,32 @@ class Eynollah: all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) pixel_lines = 6 + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - if num_col_classifier >= 3: + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) - + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - - - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) - else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) #print(boxes_d,'boxes_d') #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) From eac18c553d6829cbb6c3c0d6ca1572977a2b3243 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 13 Dec 2023 01:44:51 +0100 Subject: [PATCH 05/50] machine based reading order as an argument --- qurator/eynollah/cli.py | 8 ++++++++ qurator/eynollah/eynollah.py | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a2a2ad0..a422df9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -133,6 +133,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool would ignore page extraction", ) +@click.option( + "--reading_order_machine_based/--heuristic_reading_order", + "-romb/-hro", + is_flag=True, + help="if this parameter set to true, this tool would apply machine based reading order detection", +) @click.option( "--log-level", "-l", @@ -160,6 +166,7 @@ def main( allow_scaling, headers_off, light_version, + reading_order_machine_based, ignore_page_extraction, log_level ): @@ -197,6 +204,7 @@ def main( headers_off=headers_off, light_version=light_version, ignore_page_extraction=ignore_page_extraction, + reading_order_machine_based=reading_order_machine_based, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c008476..5e06734 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -165,6 +165,7 @@ class Eynollah: headers_off=False, light_version=False, ignore_page_extraction=False, + reading_order_machine_based=False, override_dpi=None, logger=None, pcgts=None, @@ -181,6 +182,7 @@ class Eynollah: self.dir_in = dir_in self.dir_of_all = dir_of_all self.dir_save_page = dir_save_page + self.reading_order_machine_based = reading_order_machine_based self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed self.dir_of_cropped_images=dir_of_cropped_images @@ -226,7 +228,7 @@ class Eynollah: self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" - self.model_reading_order_machine_dir = dir_models + "/model_6_reading_order_machine_based" + self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -3139,8 +3141,6 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - self.reading_order_machine_based = True#False#True#True t0_tot = time.time() From 514466883415f86ea90b6ef48a4e15187407ec05 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 17 Jul 2024 10:01:37 +0200 Subject: [PATCH 06/50] ocr engine first integration --- qurator/eynollah/cli.py | 8 + qurator/eynollah/eynollah.py | 295 ++++++++++++++++++++++++++++++++++- qurator/eynollah/writer.py | 15 +- 3 files changed, 313 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index a422df9..833e904 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -139,6 +139,12 @@ from qurator.eynollah.eynollah import Eynollah is_flag=True, help="if this parameter set to true, this tool would apply machine based reading order detection", ) +@click.option( + "--do_ocr", + "-ocr/-noocr", + is_flag=True, + help="if this parameter set to true, this tool will try to do ocr", +) @click.option( "--log-level", "-l", @@ -167,6 +173,7 @@ def main( headers_off, light_version, reading_order_machine_based, + do_ocr, ignore_page_extraction, log_level ): @@ -205,6 +212,7 @@ def main( light_version=light_version, ignore_page_extraction=ignore_page_extraction, reading_order_machine_based=reading_order_machine_based, + do_ocr=do_ocr, ) eynollah.run() #pcgts = eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 5e06734..a505b0e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -17,6 +17,16 @@ import gc from ocrd_utils import getLogger import cv2 import numpy as np +from transformers import TrOCRProcessor +from PIL import Image +import torch +from difflib import SequenceMatcher as sq +from transformers import VisionEncoderDecoderModel +from numba import cuda +import copy +from scipy.signal import find_peaks +from scipy.ndimage import gaussian_filter1d + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" stderr = sys.stderr sys.stderr = open(os.devnull, "w") @@ -166,6 +176,7 @@ class Eynollah: light_version=False, ignore_page_extraction=False, reading_order_machine_based=False, + do_ocr=False, override_dpi=None, logger=None, pcgts=None, @@ -199,6 +210,7 @@ class Eynollah: self.headers_off = headers_off self.light_version = light_version self.ignore_page_extraction = ignore_page_extraction + self.ocr = do_ocr self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -233,6 +245,9 @@ class Eynollah: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + if self.ocr: + self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" + self.model_tables = dir_models + "/eynollah-tables_20210319" self.models = {} @@ -251,6 +266,10 @@ class Eynollah: self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) + if self.ocr: + self.model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")#("microsoft/trocr-base-printed")#("microsoft/trocr-base-handwritten") self.ls_imgs = os.listdir(self.dir_in) @@ -3135,6 +3154,223 @@ class Eynollah: return order_of_texts, id_of_texts + def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.2*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + print(len(peaks_real), 'len(peaks_real)') + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + + arg_sort4 =arg_sort[::-1][:4] + + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] ) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final[0], peaks_final[0]], [0, height-1]) + #plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final[0], peaks_final[1] + else: + pass + + + def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.06*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + #print(len(peaks_real), 'len(peaks_real)') + + peaks_real = peaks_real[(peaks_realwidth1)] + + arg_max = np.argmax(sum_smoothed[peaks_real]) + + peaks_final = peaks_real[arg_max] + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peaks_final, peaks_final], [0, height-1]) + ##plt.plot([peaks_final[1], peaks_final[1]], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peaks_final + else: + return None + def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(self,peaks_real, sum_smoothed, start_split, end_split): + peaks_real = peaks_real[(peaks_realstart_split)] + + arg_sort = np.argsort(sum_smoothed[peaks_real]) + + arg_sort4 =arg_sort[::-1][:4] + + peaks_sort_4 = peaks_real[arg_sort][::-1][:4] + + argsort_sorted = np.argsort(peaks_sort_4) + + first_4_sorted = peaks_sort_4[argsort_sorted] + y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]] + #print(first_4_sorted,'first_4_sorted') + + arg_sortnew = np.argsort(y_4_sorted) + peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] ) + return peaks_final[0] + + def return_start_and_end_of_common_text_of_textline_ocr_new(self,textline_image, ind_tot): + width = np.shape(textline_image)[1] + height = np.shape(textline_image)[0] + common_window = int(0.15*width) + + width1 = int ( width/2. - common_window ) + width2 = int ( width/2. + common_window ) + mid = int(width/2.) + + img_sum = np.sum(textline_image[:,:,0], axis=0) + sum_smoothed = gaussian_filter1d(img_sum, 3) + + peaks_real, _ = find_peaks(sum_smoothed, height=0) + + if len(peaks_real)>70: + peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, width1, mid+2) + + peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(peaks_real, sum_smoothed, mid-2, width2) + + #plt.figure(ind_tot) + #plt.imshow(textline_image) + #plt.plot([peak_start, peak_start], [0, height-1]) + #plt.plot([peak_end, peak_end], [0, height-1]) + #plt.savefig('./'+str(ind_tot)+'.png') + + return peak_start, peak_end + else: + pass + + def return_ocr_of_textline_without_common_section(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + + split_point = self.return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image, ind_tot) + if split_point: + image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height)) + + #pixel_values1 = processor(image1, return_tensors="pt").pixel_values + #pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values + generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device)) + generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True) + + #print(generated_text_merged,'generated_text_merged') + + #generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + #generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + #generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + #generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + + #generated_text = generated_text1 + ' ' + generated_text2 + generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1] + + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + else: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + #print(generated_text,'generated_text') + #print('########################################') + return generated_text + def return_ocr_of_textline(self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot): + if h2w_ratio > 0.05: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + else: + #width = np.shape(textline_image)[1] + #height = np.shape(textline_image)[0] + #common_window = int(0.3*width) + + #width1 = int ( width/2. - common_window ) + #width2 = int ( width/2. + common_window ) + + try: + width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot) + + image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height)) + image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height)) + + pixel_values1 = processor(image1, return_tensors="pt").pixel_values + pixel_values2 = processor(image2, return_tensors="pt").pixel_values + + generated_ids1 = model_ocr.generate(pixel_values1.to(device)) + generated_ids2 = model_ocr.generate(pixel_values2.to(device)) + + generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] + generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0] + #print(generated_text1,'generated_text1') + #print(generated_text2, 'generated_text2') + #print('########################################') + + match = sq(None, generated_text1, generated_text2).find_longest_match(0, len(generated_text1), 0, len(generated_text2)) + + generated_text = generated_text1 + generated_text2[match.b+match.size:] + except: + pixel_values = processor(textline_image, return_tensors="pt").pixel_values + generated_ids = model_ocr.generate(pixel_values.to(device)) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return generated_text + + def return_textline_contour_with_added_box_coordinate(self, textline_contour, box_ind): + textline_contour[:,0] = textline_contour[:,0] + box_ind[2] + textline_contour[:,1] = textline_contour[:,1] + box_ind[0] + return textline_contour def run(self): """ @@ -3398,6 +3634,7 @@ class Eynollah: if self.plotter: self.plotter.write_images_into_directory(polygons_of_images, image_page) t_order = time.time() + if self.full_layout: if self.reading_order_machine_based: @@ -3425,11 +3662,67 @@ class Eynollah: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + + if self.ocr: + + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + img_poly_on_img = np.copy(image_page) + + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) + + ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) + + else: + ocr_all_textlines = None + #print(ocr_all_textlines) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) + if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index f537f65..c69be9b 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -2,7 +2,7 @@ # pylint: disable=import-error from pathlib import Path import os.path - +import xml.etree.ElementTree as ET from .utils.xml import create_page_xml, xml_reading_order from .utils.counter import EynollahIdCounter @@ -12,6 +12,7 @@ from ocrd_models.ocrd_page import ( CoordsType, PcGtsType, TextLineType, + TextEquivType, TextRegionType, ImageRegionType, TableRegionType, @@ -93,11 +94,13 @@ class EynollahXmlWriter(): points_co += ' ' coords.set_points(points_co[:-1]) - def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter): + def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): self.logger.debug('enter serialize_lines_in_region') for j in range(len(all_found_textline_polygons[region_idx])): coords = CoordsType() textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) text_region.add_TextLine(textline) region_bboxes = all_box_coord[region_idx] points_co = '' @@ -140,7 +143,7 @@ class EynollahXmlWriter(): with open(out_fname, 'w') as f: f.write(to_xml(pcgts)) - def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables): + def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines): self.logger.debug('enter build_pagexml_no_full_layout') # create the file structure @@ -159,7 +162,11 @@ class EynollahXmlWriter(): Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), ) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', From a62ae370c3ff37495383f8415620dc2cf5d44eb1 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 02:21:01 +0200 Subject: [PATCH 07/50] new full layout model and early layout for 1&2 column images are integrated - light version --- qurator/eynollah/eynollah.py | 118 ++++++++++++++++++++++++++++++----- qurator/eynollah/writer.py | 16 ++++- 2 files changed, 114 insertions(+), 20 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index a505b0e..8032f1e 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -241,6 +241,8 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" + self.model_region_dir_p_1_2_sp_np = dir_models + "/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" else: @@ -263,6 +265,8 @@ class Eynollah: self.model_bin = self.our_load_model(self.model_dir_of_binarization) self.model_textline = self.our_load_model(self.model_textline_dir) self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) + self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) + self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) @@ -1069,6 +1073,66 @@ class Eynollah: croped_page, page_coord = crop_image_inside_box(box, img) return croped_page, page_coord + def extract_text_regions_new(self, img, patches, cols): + self.logger.debug("enter extract_text_regions") + img_height_h = img.shape[0] + img_width_h = img.shape[1] + if not self.dir_in: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) + else: + model_region = self.model_region_fl_new if patches else self.model_region_fl_np + + if not patches: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + prediction_regions2 = None + else: + if cols == 1: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + + img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) + img = img.astype(np.uint8) + + if cols == 2: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) + img = img.astype(np.uint8) + + if cols == 3: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) + img = img.astype(np.uint8) + + if cols == 4: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) + img = img.astype(np.uint8) + + if cols == 5: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) + img = img.astype(np.uint8) + + if cols >= 6: + img = otsu_copy_binary(img) + img = img.astype(np.uint8) + img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) + img = img.astype(np.uint8) + + marginal_of_patch_percent = 0.1 + + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + + prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) + self.logger.debug("exit extract_text_regions") + return prediction_regions, prediction_regions + + def extract_text_regions(self, img, patches, cols): self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] @@ -1652,10 +1716,17 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(img_bin) if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + if num_col_classifier == 1 or num_col_classifier == 2: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + else: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2828,24 +2899,32 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - - regions_fully, regions_fully_only_drop = self.extract_text_regions(image_page, True, cols=num_col_classifier) - text_regions_p[:,:][regions_fully[:,:,0]==6]=6 - regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + + # 6 is the separators lable in old full layout model + # 4 is the drop capital class in old full layout model + # in the new full layout drop capital is 3 and separators are 5 + + text_regions_p[:,:][regions_fully[:,:,0]==5]=6 + regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 + + #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 + #regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) + #regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) - regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) - if num_col_classifier > 2: - regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 - else: - regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) + ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) + ##if num_col_classifier > 2: + ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 + ##else: + ##regions_fully_np = filter_small_drop_capitals_from_no_patch_layout(regions_fully_np, text_regions_p) - regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) + ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 - text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 + ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() ####if not self.tables: @@ -3645,8 +3724,13 @@ class Eynollah: else: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml) + + if self.ocr: + ocr_all_textlines = [] + else: + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index c69be9b..29caddc 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -208,7 +208,7 @@ class EynollahXmlWriter(): return pcgts - def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml): + def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, ocr_all_textlines): self.logger.debug('enter build_pagexml_full_layout') # create the file structure @@ -225,14 +225,24 @@ class EynollahXmlWriter(): textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter) + + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord, all_box_coord, slopes, counter, ocr_textlines) self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) for mm in range(len(found_polygons_text_region_h)): textregion = TextRegionType(id=counter.next_region_id, type_='header', Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) page.add_TextRegion(textregion) - self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter) + + if ocr_all_textlines: + ocr_textlines = ocr_all_textlines[mm] + else: + ocr_textlines = None + self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter, ocr_textlines) for mm in range(len(found_polygons_marginals)): marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', From be144db9f83fbdd0bd345b89f5634b419e0fd919 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 18:13:10 +0200 Subject: [PATCH 08/50] updating 1&2 columns images + full layout --- qurator/eynollah/eynollah.py | 143 +++++++++++++++++++++-------- qurator/eynollah/utils/__init__.py | 14 ++- 2 files changed, 115 insertions(+), 42 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8032f1e..54e6e3b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1083,43 +1083,64 @@ class Eynollah: model_region = self.model_region_fl_new if patches else self.model_region_fl_np if not patches: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1000 / float(img_width_h)), 1000) img = img.astype(np.uint8) if cols == 2: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1300 / float(img_width_h)), 1300) img = img.astype(np.uint8) if cols == 3: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1600 / float(img_width_h)), 1600) img = img.astype(np.uint8) if cols == 4: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 1900 / float(img_width_h)), 1900) img = img.astype(np.uint8) if cols == 5: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2200 / float(img_width_h)), 2200) img = img.astype(np.uint8) if cols >= 6: - img = otsu_copy_binary(img) + if self.light_version: + pass + else: + img = otsu_copy_binary(img) img = img.astype(np.uint8) img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500) img = img.astype(np.uint8) @@ -1611,6 +1632,7 @@ class Eynollah: img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) + #print(img.shape,'bin shape') if not self.dir_in: prediction_textline = self.do_prediction(patches, img, model_textline) else: @@ -1664,6 +1686,7 @@ class Eynollah: box_sub.put(boxes_sub_new) def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_light_v") + t_in = time.time() erosion_hurts = False img_org = np.copy(img) img_height_h = img_org.shape[0] @@ -1671,7 +1694,7 @@ class Eynollah: #model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens) - + #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: img_w_new = 1000 @@ -1711,9 +1734,12 @@ class Eynollah: #img= np.copy(prediction_bin) img_bin = np.copy(prediction_bin) - + #print("inside 1 ", time.time()-t_in) textline_mask_tot_ea = self.run_textline(img_bin) + + + #print("inside 2 ", time.time()-t_in) if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: @@ -1727,12 +1753,14 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - + + #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + img_bin = resize_image(img_bin,img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] @@ -1787,8 +1815,8 @@ class Eynollah: text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea + #print("inside 4 ", time.time()-t_in) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2553,7 +2581,11 @@ class Eynollah: prediction_table_erode = cv2.erode(prediction_table[:,:,0], KERNEL, iterations=20) prediction_table_erode = cv2.dilate(prediction_table_erode, KERNEL, iterations=20) return prediction_table_erode.astype(np.int16) - def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts): + def run_graphics_and_columns_light(self, text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light): + + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') + #print(erosion_hurts, 'erosion_hurts') + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2563,7 +2595,7 @@ class Eynollah: img_g3[:, :, 2] = img_g[:, :] image_page, page_coord, cont_page = self.extract_page() - + #print("inside graphics 1 ", time.time() - t_in_gr) if self.tables: table_prediction = self.get_tables_from_model(image_page, num_col_classifier) else: @@ -2574,6 +2606,9 @@ class Eynollah: text_regions_p_1 = text_regions_p_1[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = mask_images.astype(np.uint8) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10) @@ -2582,7 +2617,7 @@ class Eynollah: img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - + #print("inside graphics 2 ", time.time() - t_in_gr) if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: @@ -2600,8 +2635,10 @@ class Eynollah: except Exception as why: self.logger.error(why) num_col = None - return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea + #print("inside graphics 3 ", time.time() - t_in_gr) + return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): + t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) @@ -2629,13 +2666,11 @@ class Eynollah: img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) - if erosion_hurts: img_only_regions = np.copy(img_only_regions_with_sep[:,:]) else: img_only_regions = cv2.erode(img_only_regions_with_sep[:,:], KERNEL, iterations=6) - try: num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col = num_col + 1 @@ -2682,6 +2717,7 @@ class Eynollah: return textline_mask_tot_ea def run_deskew(self, textline_mask_tot_ea): + #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) @@ -2805,7 +2841,7 @@ class Eynollah: self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables - def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts): + def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') if self.tables: @@ -2900,20 +2936,23 @@ class Eynollah: image_page = image_page.astype(np.uint8) - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + if self.light_version: + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) + else: + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 text_regions_p[:,:][regions_fully[:,:,0]==5]=6 - regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 + ###regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 3] = 4 #text_regions_p[:,:][regions_fully[:,:,0]==6]=6 - #regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) - #regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 - - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully) + ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) + ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 + drop_capital_label_in_full_layout_model = 3 + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -2923,7 +2962,7 @@ class Eynollah: ###regions_fully = boosting_headers_by_longshot_region_segmentation(regions_fully, regions_fully_np, img_only_regions) # plt.imshow(regions_fully[:,:,0]) # plt.show() - text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4 + text_regions_p[:, :][regions_fully[:, :, 0] == drop_capital_label_in_full_layout_model] = 4 ####text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4 #plt.imshow(text_regions_p) #plt.show() @@ -3463,22 +3502,41 @@ class Eynollah: self.ls_imgs = [1] for img_name in self.ls_imgs: + print(img_name) t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - + #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) + + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -3498,7 +3556,7 @@ class Eynollah: continue else: return pcgts - + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -3513,17 +3571,20 @@ class Eynollah: textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) if self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts) + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - + #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -3625,13 +3686,16 @@ class Eynollah: # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) else: pass + + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - + #print("text region early 5 in %.1fs", time.time() - t0) if not self.curved_line: if self.light_version: if self.textline_light: @@ -3651,7 +3715,7 @@ class Eynollah: all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) @@ -3778,7 +3842,10 @@ class Eynollah: #print(x, y, w, h, h/float(w),'ratio') h2w_ratio = h/float(w) mask_poly = np.zeros(image_page.shape) - img_poly_on_img = np.copy(image_page) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) @@ -3805,8 +3872,10 @@ class Eynollah: pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) ##return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) + #print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index d2b2488..929669f 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -775,9 +775,8 @@ def put_drop_out_from_only_drop_model(layout_no_patch, layout1): return layout_no_patch -def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): - - drop_only = (layout_in_patch[:, :, 0] == 4) * 1 +def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop_capital_label): + drop_only = (layout_in_patch[:, :, 0] == drop_capital_label) * 1 contours_drop, hir_on_drop = return_contours_of_image(drop_only) contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop) @@ -786,13 +785,18 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch): contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.00001] - areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001] + areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.00001] contours_drop_parent_final = [] for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - layout_in_patch[y : y + h, x : x + w, 0] = 4 + + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + + layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label + else: + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label return layout_in_patch From 00bf2b64d016df86810ec2eed5799799c7a13fbd Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 7 Aug 2024 19:07:54 +0200 Subject: [PATCH 09/50] 1&2 column images only printspace --- qurator/eynollah/eynollah.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 54e6e3b..3f078b0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3549,7 +3549,8 @@ class Eynollah: if not num_col: self.logger.info("No columns detected, outputting an empty PAGE-XML") - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], []) + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t1) if self.dir_in: self.writer.write_pagexml(pcgts) From e97677879638816ee12d0e1840b41e3e021ea9b2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 14:33:01 +0200 Subject: [PATCH 10/50] testing pyproject.toml --- pyproject.toml | 30 ++++++++++++++++ qurator/eynollah/cli.py | 80 +++++++++++++++++++++++++---------------- requirements.txt | 8 ----- setup.py | 28 --------------- 4 files changed, 80 insertions(+), 66 deletions(-) create mode 100644 pyproject.toml delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..102f443 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "eynollah" +version = "1.2.3" + + + + +dependencies = [ + "ocrd >= 2.23.3", + "tensorflow >= 2.12.0", + "scikit-learn >= 0.23.2", + "imutils >= 0.5.3", + "numpy < 1.24.0", + "matplotlib", + "torch == 2.0.1", + "transformers == 4.30.2", + "numba == 0.58.1", +] + +[project.scripts] +eynollah = "qurator.eynollah.cli:main" + + +[tool.setuptools.packages.find] +where = ["."] +include = ["qurator"] diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 833e904..6c6561f 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -3,14 +3,60 @@ import click from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +@click.group() +def main(): + pass -@click.command() +@main.command() +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_modal_image", + "-domi", + help="directory where ground truth images would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_classes", + "-docl", + help="directory where ground truth classes would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--input_height", + "-ih", + help="input height", +) +@click.option( + "--input_width", + "-iw", + help="input width", +) +@click.option( + "--min_area_size", + "-min", + help="min area size of regions considered for reading order training.", +) + +def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): + xml_files_ind = os.listdir(dir_xml) + + +@main.command() @click.option( "--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), ) + @click.option( "--out", "-o", @@ -146,37 +192,13 @@ from qurator.eynollah.eynollah import Eynollah help="if this parameter set to true, this tool will try to do ocr", ) @click.option( - "--log-level", + "--log_level", "-l", type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) -def main( - image, - out, - dir_in, - model, - save_images, - save_layout, - save_deskewed, - save_all, - save_page, - enable_plotting, - allow_enhancement, - curved_line, - textline_light, - full_layout, - tables, - right2left, - input_binary, - allow_scaling, - headers_off, - light_version, - reading_order_machine_based, - do_ocr, - ignore_page_extraction, - log_level -): + +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -215,8 +237,6 @@ def main( do_ocr=do_ocr, ) eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 530dac2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 -numpy <1.24.0 -scikit-learn >= 0.23.2 -tensorflow >=2.12.0 -imutils >= 0.5.3 -matplotlib -setuptools >= 50 diff --git a/setup.py b/setup.py deleted file mode 100644 index 9abf158..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -from setuptools import setup, find_packages -from json import load - -install_requires = open('requirements.txt').read().split('\n') -with open('ocrd-tool.json', 'r', encoding='utf-8') as f: - version = load(f)['version'] - -setup( - name='eynollah', - version=version, - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Vahid Rezanezhad', - url='https://github.com/qurator-spk/eynollah', - license='Apache License 2.0', - namespace_packages=['qurator'], - packages=find_packages(exclude=['tests']), - install_requires=install_requires, - package_data={ - '': ['*.json'] - }, - entry_points={ - 'console_scripts': [ - 'eynollah=qurator.eynollah.cli:main', - 'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main', - ] - }, -) From 53fd5fb2a5da9a4c42bd1964a3ed1d2427f8637e Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 14:42:37 +0200 Subject: [PATCH 11/50] resolving #106 for pyproject.toml test --- qurator/eynollah/cli.py | 6 +++++- qurator/eynollah/eynollah.py | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 6c6561f..b0f55cd 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -236,7 +236,11 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s reading_order_machine_based=reading_order_machine_based, do_ocr=do_ocr, ) - eynollah.run() + if dir_in: + eynollah.run() + else: + pcgts = eynollah.run() + eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 3f078b0..b27d269 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3797,7 +3797,8 @@ class Eynollah: pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts else: @@ -3872,9 +3873,11 @@ class Eynollah: self.logger.info("detection of reading order took %.1fs", time.time() - t_order) pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) self.logger.info("Job done in %.1fs", time.time() - t0) - ##return pcgts + if not self.dir_in: + return pcgts #print("text region early 7 in %.1fs", time.time() - t0) - self.writer.write_pagexml(pcgts) + if self.dir_in: + self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) #print("Job done in %.1fs", time.time() - t0) From 4c50479cb87cf6abf29f1ce8f907eb6814eedec0 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 14 Aug 2024 15:28:36 +0200 Subject: [PATCH 12/50] pyproject.toml may work for ocrd --- pyproject.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 102f443..c76f7e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "eynollah" -version = "1.2.3" +version = "0.1.0" @@ -23,8 +23,12 @@ dependencies = [ [project.scripts] eynollah = "qurator.eynollah.cli:main" +ocrd-eynollah-segment="qurator.eynollah.ocrd_cli:main" [tool.setuptools.packages.find] where = ["."] include = ["qurator"] + +[tool.setuptools.package-data] +"*" = ["*.json", '*.yml', '*.xml', '*.xsd'] From 74eac4daccd7e5bd9dc5644dc01ad54671671a10 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 15 Aug 2024 13:50:36 +0200 Subject: [PATCH 13/50] dtype = object in the case of length 1 arise error --- qurator/eynollah/eynollah.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b27d269..b4e7276 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3599,7 +3599,10 @@ class Eynollah: contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + if len(contours_only_text_parent)>1: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + else: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -3614,7 +3617,10 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + if len(contours_only_text_parent_d)>1: + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + else: + contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) @@ -3677,7 +3683,10 @@ class Eynollah: areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + if len(contours_only_text_parent)>1: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + else: + contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) @@ -3719,7 +3728,10 @@ class Eynollah: #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if len(contours_only_text_parent_d_ordered)>1: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3809,7 +3821,10 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if len(contours_only_text_parent_d_ordered)>1: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + else: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) From 6f4205ba49e66ad99b1c18a95533d71447625faf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 15 Aug 2024 16:08:45 +0200 Subject: [PATCH 14/50] update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c76f7e7..67544bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ version = "0.1.0" dependencies = [ "ocrd >= 2.23.3", - "tensorflow >= 2.12.0", + "tensorflow == 2.12.1", "scikit-learn >= 0.23.2", "imutils >= 0.5.3", "numpy < 1.24.0", From 4f8210de71935f9980c121f5eaae4df2722903d7 Mon Sep 17 00:00:00 2001 From: cneud <952378+cneud@users.noreply.github.com> Date: Thu, 15 Aug 2024 23:23:48 +0200 Subject: [PATCH 15/50] update Makefile model location --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 525e6c3..440b0bd 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,14 @@ models: models_eynollah models_eynollah: models_eynollah.tar.gz # tar xf models_eynollah_renamed.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' # tar xf models_eynollah_renamed.tar.gz - tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + # tar xf 2022-04-05.SavedModel.tar.gz --transform 's/models_eynollah_renamed/models_eynollah/' + tar xf models_eynollah.tar.gz models_eynollah.tar.gz: # wget 'https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz' # wget 'https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz' - wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + # wget 'https://ocr-d.kba.cloud/2022-04-05.SavedModel.tar.gz' + wget https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz # Install with pip install: From c10a525675690076c1d029a483c0ff997c0c0e17 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 23 Aug 2024 02:18:16 +0200 Subject: [PATCH 16/50] inference with batch size bigger than 1 --- qurator/eynollah/eynollah.py | 172 ++++++++++++++++++++--------------- 1 file changed, 100 insertions(+), 72 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b4e7276..2bf57a4 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -548,11 +548,11 @@ class Eynollah: if self.input_binary: img = self.imread() if self.dir_in: - prediction_bin = self.do_prediction(True, img, self.model_bin) + prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) else: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img, model_bin) + prediction_bin = self.do_prediction(True, img, model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 @@ -703,7 +703,7 @@ class Eynollah: return model, None - def do_prediction(self, patches, img, model, marginal_of_patch_percent=0.1): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -745,7 +745,17 @@ class Eynollah: nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) - + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): if i == 0: @@ -766,59 +776,77 @@ class Eynollah: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + if batch_indexer == n_batch_inference: + + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -835,7 +863,7 @@ class Eynollah: img = img / float(255.0) img = resize_image(img, img_height_model, img_width_model) - label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] @@ -1147,7 +1175,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1173,7 +1201,7 @@ class Eynollah: img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.7), int(img_width_h * 0.7)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: @@ -1181,7 +1209,7 @@ class Eynollah: img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.4), int(img_width_h * 0.4)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) elif cols > 2: @@ -1189,7 +1217,7 @@ class Eynollah: img2 = img2.astype(np.uint8) img2 = resize_image(img2, int(img_height_h * 0.3), int(img_width_h * 0.3)) marginal_of_patch_percent = 0.1 - prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent) + prediction_regions2 = self.do_prediction(patches, img2, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions2 = resize_image(prediction_regions2, img_height_h, img_width_h) if cols == 2: @@ -1245,7 +1273,7 @@ class Eynollah: img= resize_image(img, int(img_height_h * 0.9), int(img_width_h * 0.9)) marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 @@ -1634,9 +1662,9 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) #print(img.shape,'bin shape') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1721,9 +1749,9 @@ class Eynollah: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -1870,9 +1898,9 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0]), int(img_org.shape[1])) if self.dir_in: - prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, 0.2) + prediction_regions_org2 = self.do_prediction(True, img, self.model_region_p2, marginal_of_patch_percent=0.2) else: - prediction_regions_org2 = self.do_prediction(True, img, model_region, 0.2) + prediction_regions_org2 = self.do_prediction(True, img, model_region, marginal_of_patch_percent=0.2) prediction_regions_org2=resize_image(prediction_regions_org2, img_height_h, img_width_h ) @@ -1905,9 +1933,9 @@ class Eynollah: else: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] @@ -1958,9 +1986,9 @@ class Eynollah: if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_org, model_bin) + prediction_bin = self.do_prediction(True, img_org, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_org, self.model_bin) + prediction_bin = self.do_prediction(True, img_org, self.model_bin, n_batch_inference=5) prediction_bin = resize_image(prediction_bin, img_height_h, img_width_h ) prediction_bin=prediction_bin[:,:,0] From 04e79002b3daa3f4e69921e6b94b3d0a6ee48639 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 24 Aug 2024 12:54:19 +0200 Subject: [PATCH 17/50] making light version faster for 1 and 2 columns images --- qurator/eynollah/eynollah.py | 88 ++++++++++++++++++------ qurator/eynollah/utils/separate_lines.py | 16 ++--- 2 files changed, 75 insertions(+), 29 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2bf57a4..640db16 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -28,6 +28,7 @@ from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' stderr = sys.stderr sys.stderr = open(os.devnull, "w") import tensorflow as tf @@ -299,17 +300,25 @@ class Eynollah: def _cache_images(self, image_filename=None, image_pil=None): ret = {} + t_c0 = time.time() if image_filename: ret['img'] = cv2.imread(image_filename) - self.dpi = check_dpi(image_filename) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_filename) else: ret['img'] = pil2cv(image_pil) - self.dpi = check_dpi(image_pil) + if self.light_version: + self.dpi = 100 + else: + self.dpi = check_dpi(image_pil) ret['img_grayscale'] = cv2.cvtColor(ret['img'], cv2.COLOR_BGR2GRAY) for prefix in ('', '_grayscale'): ret[f'img{prefix}_uint8'] = ret[f'img{prefix}'].astype(np.uint8) return ret def reset_file_name_dir(self, image_filename): + t_c = time.time() self._imgs = self._cache_images(image_filename=image_filename) self.image_filename = image_filename @@ -491,6 +500,27 @@ class Eynollah: num_column_is_classified = True return img_new, num_column_is_classified + + def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): + self.logger.debug("enter calculate_width_height_by_columns") + if num_col == 1: + img_w_new = 1300 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) + else: + img_w_new = 1500 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) + + if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: + img_new = np.copy(img) + num_column_is_classified = False + elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + img_new = np.copy(img) + num_column_is_classified = False + else: + img_new = resize_image(img, img_h_new, img_w_new) + num_column_is_classified = True + + return img_new, num_column_is_classified def resize_image_with_column_classifier(self, is_image_enhanced, img_bin): self.logger.debug("enter resize_image_with_column_classifier") @@ -600,16 +630,24 @@ class Eynollah: self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) if dpi < DPI_THRESHOLD: - img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + else: + img_new, num_column_is_classified = self.calculate_width_height_by_columns(img, num_col, width_early, label_p_pred) if light_version: image_res = np.copy(img_new) else: image_res = self.predict_enhancement(img_new) is_image_enhanced = True else: - num_column_is_classified = True - image_res = np.copy(img) - is_image_enhanced = False + if light_version and num_col in (1,2): + img_new, num_column_is_classified = self.calculate_width_height_by_columns_1_2(img, num_col, width_early, label_p_pred) + image_res = np.copy(img_new) + is_image_enhanced = True + else: + num_column_is_classified = True + image_res = np.copy(img) + is_image_enhanced = False self.logger.debug("exit resize_and_enhance_image_with_column_classifier") return is_image_enhanced, img, image_res, num_col, num_column_is_classified, img_bin @@ -1175,7 +1213,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1280,7 +1318,10 @@ class Eynollah: def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") - num_cores = cpu_count() + if len(contours)>15: + num_cores = cpu_count() + else: + num_cores = 1 queue_of_all_params = Queue() processes = [] @@ -1554,8 +1595,6 @@ class Eynollah: mask_only_con_region = np.zeros(textline_mask_tot_ea.shape) mask_only_con_region = cv2.fillPoly(mask_only_con_region, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) - # plt.imshow(mask_only_con_region) - # plt.show() if self.textline_light: all_text_region_raw = np.copy(textline_mask_tot_ea) @@ -1660,11 +1699,11 @@ class Eynollah: img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape') + #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=4) + prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) prediction_textline = resize_image(prediction_textline, img_h, img_w) if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1747,11 +1786,14 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) + t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2710,10 +2752,10 @@ class Eynollah: return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction def run_enhancement(self,light_version): + t_in = time.time() self.logger.info("Resizing and enhancing image...") is_image_enhanced, img_org, img_res, num_col_classifier, num_column_is_classified, img_bin = self.resize_and_enhance_image_with_column_classifier(light_version) self.logger.info("Image was %senhanced.", '' if is_image_enhanced else 'not ') - scale = 1 if is_image_enhanced: if self.allow_enhancement: @@ -2731,6 +2773,7 @@ class Eynollah: if self.allow_scaling: img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced, img_bin) self.get_image_and_scales_after_enhancing(img_org, img_res) + #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified def run_textline(self, image_page): @@ -2748,7 +2791,8 @@ class Eynollah: #print(textline_mask_tot_ea.shape, 'textline_mask_tot_ea deskew') sigma = 2 main_page_deskew = True - slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter) + n_total_angles = 30 + slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, n_total_angles, main_page_deskew, plotter=self.plotter) slope_first = 0 if self.plotter: @@ -2871,7 +2915,7 @@ class Eynollah: def run_boxes_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light): self.logger.debug('enter run_boxes_full_layout') - + t_full0 = time.time() if self.tables: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: image_page_rotated_n,textline_mask_tot_d,text_regions_p_1_n , table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) @@ -2963,12 +3007,12 @@ class Eynollah: text_regions_p[:, :][text_regions_p[:, :] == 4] = 8 image_page = image_page.astype(np.uint8) - + #print("full inside 1", time.time()- t_full0) if self.light_version: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) else: regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) - + #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model # in the new full layout drop capital is 3 and separators are 5 @@ -3012,6 +3056,7 @@ class Eynollah: img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) self.logger.debug('exit run_boxes_full_layout') + #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables def our_load_model(self, model_file): @@ -3534,6 +3579,7 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) @@ -3922,7 +3968,7 @@ class Eynollah: if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) - #print("Job done in %.1fs", time.time() - t0) + print("Job done in %.1fs", time.time() - t0) if self.dir_in: self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index acdc2e9..1004a92 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -1569,7 +1569,7 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1626,7 +1626,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): ang_int=0 - angels=np.linspace(ang_int-22.5,ang_int+22.5,100) + angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) var_res=[] for rot in angels: @@ -1649,7 +1649,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): #plt.imshow(img_resized) #plt.show() - angels=np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) + angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) var_res=[] @@ -1680,7 +1680,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-12,100) + angels=np.linspace(-90,-12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1700,7 +1700,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,12,100) + angels=np.linspace(90,12,n_tot_angles) var_res=[] for rot in angels: img_rot=rotate_image(img_resized,rot) @@ -1719,7 +1719,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): except: ang_int=0 else: - angels=np.linspace(-25,25,60) + angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) var_res=[] indexer=0 for rot in angels: @@ -1749,7 +1749,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): early_slope_edge=22 if abs(ang_int)>early_slope_edge and ang_int<0: - angels=np.linspace(-90,-25,60) + angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) var_res=[] @@ -1772,7 +1772,7 @@ def return_deskew_slop(img_patch_org, sigma_des, main_page=False, plotter=None): elif abs(ang_int)>early_slope_edge and ang_int>0: - angels=np.linspace(90,25,60) + angels=np.linspace(90,25,int(n_tot_angles/2.)+10) var_res=[] From 7ae6a8776fb3cddc9279680f40fc23bc9b4df946 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 26 Aug 2024 16:02:10 +0200 Subject: [PATCH 18/50] ignoring dpi check by light version --- qurator/eynollah/eynollah.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 640db16..ff35d6f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -504,11 +504,11 @@ class Eynollah: def calculate_width_height_by_columns_1_2(self, img, num_col, width_early, label_p_pred): self.logger.debug("enter calculate_width_height_by_columns") if num_col == 1: + img_w_new = 1000 + img_h_new = int(img.shape[0] / float(img.shape[1]) * 1000) + else: img_w_new = 1300 img_h_new = int(img.shape[0] / float(img.shape[1]) * 1300) - else: - img_w_new = 1500 - img_h_new = int(img.shape[0] / float(img.shape[1]) * 1500) if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) @@ -1213,7 +1213,7 @@ class Eynollah: marginal_of_patch_percent = 0.1 - prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=4) + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") @@ -1810,7 +1810,8 @@ class Eynollah: #print("inside 2 ", time.time()-t_in) - + + #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) From 93005959e54abf5f67def79868b8fd8d8831e287 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 27 Aug 2024 18:13:46 +0200 Subject: [PATCH 19/50] inference batch size debugged --- qurator/eynollah/eynollah.py | 71 +++++++++++++++++++++++++++++++++--- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index ff35d6f..f183dee 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -89,7 +89,7 @@ from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter -MIN_AREA_REGION = 0.0005 +MIN_AREA_REGION = 0.00001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -182,6 +182,7 @@ class Eynollah: logger=None, pcgts=None, ): + self.light_version = light_version if not dir_in: if image_pil: self._imgs = self._cache_images(image_pil=image_pil) @@ -209,7 +210,6 @@ class Eynollah: self.input_binary = input_binary self.allow_scaling = allow_scaling self.headers_off = headers_off - self.light_version = light_version self.ignore_page_extraction = ignore_page_extraction self.ocr = do_ocr self.pcgts = pcgts @@ -828,7 +828,64 @@ class Eynollah: batch_indexer = batch_indexer + 1 if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) @@ -885,6 +942,7 @@ class Eynollah: batch_indexer = 0 img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + prediction_true = prediction_true.astype(np.uint8) #del model #gc.collect() @@ -1789,9 +1847,9 @@ class Eynollah: t_bin = time.time() if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=10) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=10) + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] @@ -1808,7 +1866,6 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(img_bin) - #print("inside 2 ", time.time()-t_in) #print(img_resized.shape, num_col_classifier, "num_col_classifier") @@ -1839,6 +1896,10 @@ class Eynollah: mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + mask_texts_only = mask_texts_only.astype('uint8') + + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) From 0f87974b0c7a7bdfddd31ffa99b89c58c952ddcf Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 2 Sep 2024 16:21:07 +0200 Subject: [PATCH 20/50] writing drop capitals in xml output + and may resolve issue #110 --- qurator/eynollah/eynollah.py | 23 ++++++++++++----------- qurator/eynollah/writer.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f183dee..1bb0eff 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3735,9 +3735,9 @@ class Eynollah: contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - if len(contours_only_text_parent)>1: + try: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - else: + except: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3753,10 +3753,11 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - if len(contours_only_text_parent_d)>1: + try: contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - else: + except: contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) @@ -3819,9 +3820,9 @@ class Eynollah: areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - if len(contours_only_text_parent)>1: + try: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - else: + except: contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) @@ -3864,10 +3865,10 @@ class Eynollah: #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - if len(contours_only_text_parent_d_ordered)>1: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: + try: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + except: + contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3957,9 +3958,9 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - if len(contours_only_text_parent_d_ordered)>1: + try: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - else: + except: contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) diff --git a/qurator/eynollah/writer.py b/qurator/eynollah/writer.py index 29caddc..8eb1027 100644 --- a/qurator/eynollah/writer.py +++ b/qurator/eynollah/writer.py @@ -136,6 +136,29 @@ class EynollahXmlWriter(): points_co += str(int((contour_textline[0][1] + region_bboxes[0]+page_coord[0])/self.scale_y)) points_co += ' ' coords.set_points(points_co[:-1]) + + def serialize_lines_in_dropcapital(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): + self.logger.debug('enter serialize_lines_in_region') + for j in range(1): + coords = CoordsType() + textline = TextLineType(id=counter.next_line_id, Coords=coords) + if ocr_all_textlines_textregion: + textline.set_TextEquiv( [ TextEquivType(Unicode=ocr_all_textlines_textregion[j]) ] ) + text_region.add_TextLine(textline) + #region_bboxes = all_box_coord[region_idx] + points_co = '' + for idx_contour_textline, contour_textline in enumerate(all_found_textline_polygons[j]): + if len(contour_textline) == 2: + points_co += str(int((contour_textline[0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((contour_textline[1] + page_coord[0]) / self.scale_y)) + else: + points_co += str(int((contour_textline[0][0] + page_coord[2]) / self.scale_x)) + points_co += ',' + points_co += str(int((contour_textline[0][1] + page_coord[0])/self.scale_y)) + + points_co += ' ' + coords.set_points(points_co[:-1]) def write_pagexml(self, pcgts): out_fname = os.path.join(self.dir_out, self.image_filename_stem) + ".xml" @@ -251,8 +274,12 @@ class EynollahXmlWriter(): self.serialize_lines_in_marginal(marginal, all_found_textline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) for mm in range(len(found_polygons_drop_capitals)): - page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital', - Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))) + dropcapital = TextRegionType(id=counter.next_region_id, type_='drop-capital', + Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord))) + page.add_TextRegion(dropcapital) + all_box_coord_drop = None + slopes_drop = None + self.serialize_lines_in_dropcapital(dropcapital, [found_polygons_drop_capitals[mm]], mm, page_coord, all_box_coord_drop, slopes_drop, counter, ocr_all_textlines_textregion=None) for mm in range(len(found_polygons_text_region_img)): page.add_ImageRegion(ImageRegionType(id=counter.next_region_id, Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_img[mm], page_coord)))) From c3a4a1bba77d40b9be8926483e40a1ccefe42198 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 3 Sep 2024 13:14:10 +0200 Subject: [PATCH 21/50] resolving issue #110 in a better way --- qurator/eynollah/eynollah.py | 61 ++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1bb0eff..c88f0f9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2357,7 +2357,6 @@ class Eynollah: arg_text_con = [] for ii in range(len(cx_text_only)): for jj in range(len(boxes)): - print(cx_text_only[ii],cy_text_only[ii],'markaz') if cx_text_only[ii] >= boxes[jj][0] and cx_text_only[ii] < boxes[jj][1] and cy_text_only[ii] >= boxes[jj][2] and cy_text_only[ii] < boxes[jj][3]: # this is valid if the center of region identify in which box it is located arg_text_con.append(jj) break @@ -3624,6 +3623,9 @@ class Eynollah: textline_contour[:,0] = textline_contour[:,0] + box_ind[2] textline_contour[:,1] = textline_contour[:,1] + box_ind[0] return textline_contour + def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): + return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + def run(self): """ @@ -3735,11 +3737,15 @@ class Eynollah: contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - try: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - except: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -3753,12 +3759,14 @@ class Eynollah: if len(areas_cnt_text_d)>0: contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] index_con_parents_d = np.argsort(areas_cnt_text_d) - try: - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - except: - contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) @@ -3820,11 +3828,14 @@ class Eynollah: areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] index_con_parents = np.argsort(areas_cnt_text_parent) - try: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - except: - contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) @@ -3865,10 +3876,11 @@ class Eynollah: #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - try: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - except: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) if self.light_version: text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: @@ -3958,10 +3970,11 @@ class Eynollah: if np.abs(slope_deskew) < SLOPE_THRESHOLD: order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) else: - try: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - except: - contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) From f0b49073b7ba4746e1facd17cf8f8598e253b1d4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 3 Sep 2024 23:10:38 +0200 Subject: [PATCH 22/50] adding option for textline detection in printspace --- qurator/eynollah/eynollah.py | 939 +++++++++++++++++++---------------- 1 file changed, 512 insertions(+), 427 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c88f0f9..533e2a0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -741,7 +741,7 @@ class Eynollah: return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -774,7 +774,7 @@ class Eynollah: width_mid = img_width_model - 2 * margin height_mid = img_height_model - 2 * margin img = img / float(255.0) - img = img.astype(np.float16) + #img = img.astype(np.float16) img_h = img.shape[0] img_w = img.shape[1] prediction_true = np.zeros((img_h, img_w, 3)) @@ -832,6 +832,23 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): seg_in = seg[indexer_inside_batch,:,:] @@ -889,6 +906,22 @@ class Eynollah: label_p_pred = model.predict(img_patch,verbose=0) seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1202,9 +1235,9 @@ class Eynollah: img_height_h = img.shape[0] img_width_h = img.shape[1] if not self.dir_in: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np) else: - model_region = self.model_region_fl_new if patches else self.model_region_fl_np + model_region = self.model_region_fl if patches else self.model_region_fl_np if not patches: if self.light_version: @@ -1809,7 +1842,7 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -1866,89 +1899,98 @@ class Eynollah: textline_mask_tot_ea = self.run_textline(img_bin) - #print("inside 2 ", time.time()-t_in) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - #print(img_resized.shape, num_col_classifier, "num_col_classifier") - if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) - else: + if not skip_layout_ro: + #print("inside 2 ", time.time()-t_in) + + #print(img_resized.shape, num_col_classifier, "num_col_classifier") + if not self.dir_in: + ###if num_col_classifier == 1 or num_col_classifier == 2: + ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + ###else: + ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - else: - if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - - #print("inside 3 ", time.time()-t_in) - #plt.imshow(prediction_regions_org[:,:,0]) - #plt.show() + ##if num_col_classifier == 1 or num_col_classifier == 2: + ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + ##else: + ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - img_bin = resize_image(img_bin,img_height_h, img_width_h ) - - prediction_regions_org=prediction_regions_org[:,:,0] + #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) + #plt.show() + + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) - mask_lines_only = (prediction_regions_org[:,:] ==3)*1 - - mask_texts_only = (prediction_regions_org[:,:] ==1)*1 - - mask_texts_only = mask_texts_only.astype('uint8') - - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) - - mask_images_only=(prediction_regions_org[:,:] ==2)*1 - - polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) - - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - - #plt.imshow(test_khat[:,:]) - #plt.show() - - #for jv in range(1): - #print(jv, hir_lines_xml[0][232][3]) - #test_khat = np.zeros(prediction_regions_org.shape) + img_bin = resize_image(img_bin,img_height_h, img_width_h ) + + prediction_regions_org=prediction_regions_org[:,:,0] + + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 + + mask_texts_only = mask_texts_only.astype('uint8') - #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + + mask_images_only=(prediction_regions_org[:,:] ==2)*1 + + polygons_lines_xml, hir_lines_xml = return_contours_of_image(mask_lines_only) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) #plt.imshow(test_khat[:,:]) #plt.show() + #for jv in range(1): + #print(jv, hir_lines_xml[0][232][3]) + #test_khat = np.zeros(prediction_regions_org.shape) + + #test_khat = cv2.fillPoly(test_khat, pts = [polygons_lines_xml[232]], color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + - polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) - - - test_khat = np.zeros(prediction_regions_org.shape) - - test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) - - - #plt.imshow(test_khat[:,:]) - #plt.show() - #sys.exit() - - polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) - - polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) - - text_regions_p_true = np.zeros(prediction_regions_org.shape) - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) - - text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 - - text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #print("inside 4 ", time.time()-t_in) - return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin + polygons_lines_xml = filter_contours_area_of_image(mask_lines_only, polygons_lines_xml, hir_lines_xml, max_area=1, min_area=0.00001) + + + test_khat = np.zeros(prediction_regions_org.shape) + + test_khat = cv2.fillPoly(test_khat, pts = polygons_lines_xml, color=(1,1,1)) + + + #plt.imshow(test_khat[:,:]) + #plt.show() + #sys.exit() + + polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) + + text_regions_p_true = np.zeros(prediction_regions_org.shape) + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_lines, color=(3,3,3)) + + text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 + + text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) + #print("inside 4 ", time.time()-t_in) + return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin + else: + img_bin = resize_image(img_bin,img_height_h, img_width_h ) + return None, erosion_hurts, None, textline_mask_tot_ea, img_bin def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): self.logger.debug("enter get_regions_from_xy_2models") @@ -2392,8 +2434,6 @@ class Eynollah: ref_point += len(id_of_texts) order_of_texts_tot = [] - print(len(contours_only_text_parent),'contours_only_text_parent') - print(len(order_by_con_main),'order_by_con_main') for tj1 in range(len(contours_only_text_parent)): order_of_texts_tot.append(int(order_by_con_main[tj1])) @@ -2768,6 +2808,28 @@ class Eynollah: num_col = None #print("inside graphics 3 ", time.time() - t_in_gr) return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light + + def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light): + + #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics') + #print(erosion_hurts, 'erosion_hurts') + t_in_gr = time.time() + img_g = self.imread(grayscale=True, uint8=True) + + img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3)) + img_g3 = img_g3.astype(np.uint8) + img_g3[:, :, 0] = img_g[:, :] + img_g3[:, :, 1] = img_g[:, :] + img_g3[:, :, 2] = img_g[:, :] + + image_page, page_coord, cont_page = self.extract_page() + #print("inside graphics 1 ", time.time() - t_in_gr) + + textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): t_in_gr = time.time() img_g = self.imread(grayscale=True, uint8=True) @@ -3632,6 +3694,8 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") + + skip_layout_ro = True t0_tot = time.time() @@ -3649,398 +3713,419 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if self.light_version: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) - - if num_col_classifier == 1 or num_col_classifier ==2: - if num_col_classifier == 1: - img_w_new = 1000 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - elif num_col_classifier == 2: - img_w_new = 1300 - img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) - - textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + if not skip_layout_ro: + if self.light_version: + text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) + #print("text region early -2 in %.1fs", time.time() - t0) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + if num_col_classifier == 1 or num_col_classifier ==2: + if num_col_classifier == 1: + img_w_new = 1000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 1300 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + else: + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) + #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ + self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) + #self.logger.info("run graphics %.1fs ", time.time() - t1t) + #print("text region early -3 in %.1fs", time.time() - t0) + textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) + #print("text region early -4 in %.1fs", time.time() - t0) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) - #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ - self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) - #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) - textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) - else: - text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) - self.logger.info("Textregion detection took %.1fs ", time.time() - t1) - + text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) + self.logger.info("Textregion detection took %.1fs ", time.time() - t1) + + t1 = time.time() + num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ + self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) + self.logger.info("Graphics detection took %.1fs ", time.time() - t1) + #self.logger.info('cont_page %s', cont_page) + + if not num_col: + self.logger.info("No columns detected, outputting an empty PAGE-XML") + ocr_all_textlines = None + pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t1) + if self.dir_in: + self.writer.write_pagexml(pcgts) + continue + else: + return pcgts + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() - num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction = \ - self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) - self.logger.info("Graphics detection took %.1fs ", time.time() - t1) - #self.logger.info('cont_page %s', cont_page) - - if not num_col: - self.logger.info("No columns detected, outputting an empty PAGE-XML") - ocr_all_textlines = None - pcgts = self.writer.build_pagexml_no_full_layout([], page_coord, [], [], [], [], [], [], [], [], [], [], cont_page, [], [], ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t1) - if self.dir_in: - self.writer.write_pagexml(pcgts) - continue - else: - return pcgts - #print("text region early in %.1fs", time.time() - t0) - t1 = time.time() - if not self.light_version: - textline_mask_tot_ea = self.run_textline(image_page) - self.logger.info("textline detection took %.1fs", time.time() - t1) + if not self.light_version: + textline_mask_tot_ea = self.run_textline(image_page) + self.logger.info("textline detection took %.1fs", time.time() - t1) + t1 = time.time() + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + self.logger.info("deskewing took %.1fs", time.time() - t1) t1 = time.time() - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - self.logger.info("deskewing took %.1fs", time.time() - t1) - t1 = time.time() - #plt.imshow(table_prediction) - #plt.show() - - textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) - self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) - t1 = time.time() - if not self.full_layout: - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + #plt.imshow(table_prediction) + #plt.show() - if self.full_layout: - if not self.light_version: - img_bin_light = None - polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - text_only = ((img_revised_tab[:, :] == 1)) * 1 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - - #print("text region early 2 in %.1fs", time.time() - t0) - ###min_con_area = 0.000005 - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - #self.logger.info('areas_cnt_text %s', areas_cnt_text) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) - - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) + #print("text region early 2 marginal in %.1fs", time.time() - t0) + t1 = time.time() + if not self.full_layout: + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) + + if self.full_layout: + if not self.light_version: + img_bin_light = None + polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + text_only = ((img_revised_tab[:, :] == 1)) * 1 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 + + #print("text region early 2 in %.1fs", time.time() - t0) + ###min_con_area = 0.000005 + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + #self.logger.info('areas_cnt_text %s', areas_cnt_text) + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - ##try: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - ##except: - ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + ##try: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + ##except: + ##contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + ##areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) - contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) + contours_only_text_d, hir_on_text_d = return_contours_of_image(text_only_d) + contours_only_text_parent_d = return_parent_contours(contours_only_text_d, hir_on_text_d) - areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) - areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) - - if len(areas_cnt_text_d)>0: - contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] - index_con_parents_d = np.argsort(areas_cnt_text_d) - contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) - #try: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) - #except: - #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - - #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) - areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) - - cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) - cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) - try: - if len(cx_bigest_d) >= 5: - cx_bigest_d_last5 = cx_bigest_d[-5:] - cy_biggest_d_last5 = cy_biggest_d[-5:] - dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) - else: - cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] - cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] - dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] - ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) + areas_cnt_text_d = areas_cnt_text_d / float(text_only_d.shape[0] * text_only_d.shape[1]) + + if len(areas_cnt_text_d)>0: + contours_biggest_d = contours_only_text_parent_d[np.argmax(areas_cnt_text_d)] + index_con_parents_d = np.argsort(areas_cnt_text_d) + contours_only_text_parent_d = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d, index_con_parents_d) + #try: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=object)[index_con_parents_d]) + #except: + #contours_only_text_parent_d = list(np.array(contours_only_text_parent_d,dtype=np.int32)[index_con_parents_d]) - cx_bigest_d_big[0] = cx_bigest_d[ind_largest] - cy_biggest_d_big[0] = cy_biggest_d[ind_largest] - except Exception as why: - self.logger.error(why) - - (h, w) = text_only.shape[:2] - center = (w // 2.0, h // 2.0) - M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) - M_22 = np.array(M)[:2, :2] - p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) - x_diff = p_big[0] - cx_bigest_d_big - y_diff = p_big[1] - cy_biggest_d_big - - contours_only_text_parent_d_ordered = [] - for i in range(len(contours_only_text_parent)): - p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) - p[0] = p[0] - x_diff[0] - p[1] = p[1] - y_diff[0] - dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] - contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) - # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) - # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) - # plt.imshow(img2[:,:,0]) - # plt.show() + #areas_cnt_text_d = list(np.array(areas_cnt_text_d)[index_con_parents_d]) + areas_cnt_text_d = self.return_list_of_contours_with_desired_order(areas_cnt_text_d, index_con_parents_d) + + cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) + cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) + try: + if len(cx_bigest_d) >= 5: + cx_bigest_d_last5 = cx_bigest_d[-5:] + cy_biggest_d_last5 = cy_biggest_d[-5:] + dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) + else: + cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):] + cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):] + dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))] + ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d) + + cx_bigest_d_big[0] = cx_bigest_d[ind_largest] + cy_biggest_d_big[0] = cy_biggest_d[ind_largest] + except Exception as why: + self.logger.error(why) + + (h, w) = text_only.shape[:2] + center = (w // 2.0, h // 2.0) + M = cv2.getRotationMatrix2D(center, slope_deskew, 1.0) + M_22 = np.array(M)[:2, :2] + p_big = np.dot(M_22, [cx_bigest_big, cy_biggest_big]) + x_diff = p_big[0] - cx_bigest_d_big + y_diff = p_big[1] - cy_biggest_d_big + + contours_only_text_parent_d_ordered = [] + for i in range(len(contours_only_text_parent)): + p = np.dot(M_22, [cx_bigest[i], cy_biggest[i]]) + p[0] = p[0] - x_diff[0] + p[1] = p[1] - y_diff[0] + dists = [math.sqrt((p[0] - cx_bigest_d[j]) ** 2 + (p[1] - cy_biggest_d[j]) ** 2) for j in range(len(cx_bigest_d))] + contours_only_text_parent_d_ordered.append(contours_only_text_parent_d[np.argmin(dists)]) + # img2=np.zeros((text_only.shape[0],text_only.shape[1],3)) + # img2=cv2.fillPoly(img2,pts=[contours_only_text_parent_d[np.argmin(dists)]] ,color=(1,1,1)) + # plt.imshow(img2[:,:,0]) + # plt.show() + else: + contours_only_text_parent_d_ordered = [] + contours_only_text_parent_d = [] + contours_only_text_parent = [] + else: contours_only_text_parent_d_ordered = [] contours_only_text_parent_d = [] contours_only_text_parent = [] - else: - contours_only_text_parent_d_ordered = [] - contours_only_text_parent_d = [] - contours_only_text_parent = [] - else: - contours_only_text, hir_on_text = return_contours_of_image(text_only) - contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) - - if len(contours_only_text_parent) > 0: - areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) - areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) + contours_only_text, hir_on_text = return_contours_of_image(text_only) + contours_only_text_parent = return_parent_contours(contours_only_text, hir_on_text) + + if len(contours_only_text_parent) > 0: + areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) + areas_cnt_text = areas_cnt_text / float(text_only.shape[0] * text_only.shape[1]) - contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] - contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] - areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] + contours_biggest = contours_only_text_parent[np.argmax(areas_cnt_text)] + contours_only_text_parent = [c for jz, c in enumerate(contours_only_text_parent) if areas_cnt_text[jz] > MIN_AREA_REGION] + areas_cnt_text_parent = [area for area in areas_cnt_text if area > MIN_AREA_REGION] - index_con_parents = np.argsort(areas_cnt_text_parent) + index_con_parents = np.argsort(areas_cnt_text_parent) + + contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) + #try: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) + #except: + #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) + #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) + areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) + + cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) + cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) + #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) + # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) + # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) + else: + pass - contours_only_text_parent = self.return_list_of_contours_with_desired_order(contours_only_text_parent, index_con_parents) - #try: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=object)[index_con_parents]) - #except: - #contours_only_text_parent = list(np.array(contours_only_text_parent,dtype=np.int32)[index_con_parents]) - #areas_cnt_text_parent = list(np.array(areas_cnt_text_parent)[index_con_parents]) - areas_cnt_text_parent = self.return_list_of_contours_with_desired_order(areas_cnt_text_parent, index_con_parents) - - cx_bigest_big, cy_biggest_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest]) - cx_bigest, cy_biggest, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent) - #self.logger.debug('areas_cnt_text_parent %s', areas_cnt_text_parent) - # self.logger.debug('areas_cnt_text_parent_d %s', areas_cnt_text_parent_d) - # self.logger.debug('len(contours_only_text_parent) %s', len(contours_only_text_parent_d)) - else: - pass - - #print("text region early 3 in %.1fs", time.time() - t0) - if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - else: - txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) - boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) - boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) - if not self.curved_line: + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - else: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) - slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - - else: - - scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) - all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) - if self.full_layout: - if np.abs(slope_deskew) >= SLOPE_THRESHOLD: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) else: - #takes long timee - contours_only_text_parent_d_ordered = None + txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) + #print("text region early 4 in %.1fs", time.time() - t0) + boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) + boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) + #print("text region early 5 in %.1fs", time.time() - t0) + if not self.curved_line: if self.light_version: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + if self.textline_light: + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: - text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) + slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) - if self.plotter: - self.plotter.save_plot_of_layout(text_regions_p, image_page) - self.plotter.save_plot_of_layout_all(text_regions_p, image_page) - - pixel_img = 4 - polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) - all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) - pixel_lines = 6 - - if not self.reading_order_machine_based: - if not self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + + scale_param = 1 + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) + #print("text region early 6 in %.1fs", time.time() - t0) + if self.full_layout: + if np.abs(slope_deskew) >= SLOPE_THRESHOLD: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) - elif self.headers_off: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) + else: + #takes long timee + contours_only_text_parent_d_ordered = None + if self.light_version: + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header_light(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) else: - _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_textline_polygons, slopes, contours_only_text_parent_d_ordered) - if num_col_classifier >= 3: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - regions_without_separators = regions_without_separators.astype(np.uint8) - regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + if self.plotter: + self.plotter.save_plot_of_layout(text_regions_p, image_page) + self.plotter.save_plot_of_layout_all(text_regions_p, image_page) + + pixel_img = 4 + polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) + all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, kernel=KERNEL, curved_line=self.curved_line) + pixel_lines = 6 + + if not self.reading_order_machine_based: + if not self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines, contours_only_text_parent_h_d_ordered) + elif self.headers_off: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + num_col, _, matrix_of_lines_ch, splitter_y_new, _ = find_number_of_columns_in_document(np.repeat(text_regions_p[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + else: + _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) + if num_col_classifier >= 3: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + regions_without_separators = regions_without_separators.astype(np.uint8) + regions_without_separators = cv2.erode(regions_without_separators[:, :], KERNEL, iterations=6) + + else: + regions_without_separators_d = regions_without_separators_d.astype(np.uint8) + regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + + if not self.reading_order_machine_based: + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) else: - regions_without_separators_d = regions_without_separators_d.astype(np.uint8) - regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) + + if self.plotter: + self.plotter.write_images_into_directory(polygons_of_images, image_page) + t_order = time.time() - if not self.reading_order_machine_based: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) + if self.full_layout: + + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) - - #print(boxes_d,'boxes_d') - #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1])) - #for box_i in boxes_d: - #img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1 - #plt.imshow(img_once) - #plt.show() - #print(np.unique(img_once),'img_once') - if self.plotter: - self.plotter.write_images_into_directory(polygons_of_images, image_page) - t_order = time.time() + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - if self.full_layout: - - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) - else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + if self.ocr: + ocr_all_textlines = [] else: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - - if self.ocr: - ocr_all_textlines = [] - else: - ocr_all_textlines = None + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + - pcgts = self.writer.build_pagexml_full_layout(contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_h, slopes_marginals, cont_page, polygons_lines_xml, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) - if not self.dir_in: - return pcgts - - - else: - contours_only_text_parent_h = None - if self.reading_order_machine_based: - order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - if np.abs(slope_deskew) < SLOPE_THRESHOLD: - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + contours_only_text_parent_h = None + if self.reading_order_machine_based: + order_text_new, id_of_texts_tot = self.do_order_of_regions_with_machine_optimized_algorithm(contours_only_text_parent, contours_only_text_parent_h, text_regions_p) else: - contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) - #try: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) - #except: - #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) - order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) - + if np.abs(slope_deskew) < SLOPE_THRESHOLD: + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot) + else: + contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) + #try: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=object)[index_by_text_par_con]) + #except: + #contours_only_text_parent_d_ordered = list(np.array(contours_only_text_parent_d_ordered, dtype=np.int32)[index_by_text_par_con]) + order_text_new, id_of_texts_tot = self.do_order_of_regions(contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) + - if self.ocr: + if self.ocr: - device = cuda.get_current_device() - device.reset() - gc.collect() - model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") - torch.cuda.empty_cache() - model_ocr.to(device) - - ind_tot = 0 - #cv2.imwrite('./img_out.png', image_page) - - ocr_all_textlines = [] - for indexing, ind_poly_first in enumerate(all_found_textline_polygons): - ocr_textline_in_textregion = [] - for indexing2, ind_poly in enumerate(ind_poly_first): - if not (self.textline_light or self.curved_line): - ind_poly = copy.deepcopy(ind_poly) - box_ind = all_box_coord[indexing] - #print(ind_poly,np.shape(ind_poly), 'ind_poly') - #print(box_ind) - ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) - #print(ind_poly_copy) - ind_poly[ind_poly<0] = 0 - x, y, w, h = cv2.boundingRect(ind_poly) - #print(ind_poly_copy, np.shape(ind_poly_copy)) - #print(x, y, w, h, h/float(w),'ratio') - h2w_ratio = h/float(w) - mask_poly = np.zeros(image_page.shape) - if not self.light_version: - img_poly_on_img = np.copy(image_page) - else: - img_poly_on_img = np.copy(img_bin_light) + device = cuda.get_current_device() + device.reset() + gc.collect() + model_ocr = VisionEncoderDecoderModel.from_pretrained(self.model_ocr_dir) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") + torch.cuda.empty_cache() + model_ocr.to(device) + + ind_tot = 0 + #cv2.imwrite('./img_out.png', image_page) + + ocr_all_textlines = [] + for indexing, ind_poly_first in enumerate(all_found_textline_polygons): + ocr_textline_in_textregion = [] + for indexing2, ind_poly in enumerate(ind_poly_first): + if not (self.textline_light or self.curved_line): + ind_poly = copy.deepcopy(ind_poly) + box_ind = all_box_coord[indexing] + #print(ind_poly,np.shape(ind_poly), 'ind_poly') + #print(box_ind) + ind_poly = self.return_textline_contour_with_added_box_coordinate(ind_poly, box_ind) + #print(ind_poly_copy) + ind_poly[ind_poly<0] = 0 + x, y, w, h = cv2.boundingRect(ind_poly) + #print(ind_poly_copy, np.shape(ind_poly_copy)) + #print(x, y, w, h, h/float(w),'ratio') + h2w_ratio = h/float(w) + mask_poly = np.zeros(image_page.shape) + if not self.light_version: + img_poly_on_img = np.copy(image_page) + else: + img_poly_on_img = np.copy(img_bin_light) - mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) - - if self.textline_light: - mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) - - img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 - img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1)) + + if self.textline_light: + mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1) + + img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255 + img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255 + + img_croped = img_poly_on_img[y:y+h, x:x+w, :] + text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + + ocr_textline_in_textregion.append(text_ocr) - img_croped = img_poly_on_img[y:y+h, x:x+w, :] - text_ocr = self.return_ocr_of_textline_without_common_section(img_croped, model_ocr, processor, device, w, h2w_ratio, ind_tot) + ##cv2.imwrite(str(ind_tot)+'.png', img_croped) + ind_tot = ind_tot +1 + ocr_all_textlines.append(ocr_textline_in_textregion) - ocr_textline_in_textregion.append(text_ocr) - - ##cv2.imwrite(str(ind_tot)+'.png', img_croped) - ind_tot = ind_tot +1 - ocr_all_textlines.append(ocr_textline_in_textregion) - - else: - ocr_all_textlines = None - #print(ocr_all_textlines) - self.logger.info("detection of reading order took %.1fs", time.time() - t_order) - pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) - self.logger.info("Job done in %.1fs", time.time() - t0) + else: + ocr_all_textlines = None + #print(ocr_all_textlines) + self.logger.info("detection of reading order took %.1fs", time.time() - t_order) + pcgts = self.writer.build_pagexml_no_full_layout(txt_con_org, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) + self.logger.info("Job done in %.1fs", time.time() - t0) + if not self.dir_in: + return pcgts + #print("text region early 7 in %.1fs", time.time() - t0) + else: + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro) + + page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + all_found_textline_polygons=[ all_found_textline_polygons ] + order_text_new = [0] + slopes =[0] + id_of_texts_tot =['region_0001'] + + polygons_of_images = [] + slopes_marginals = [] + polygons_of_marginals = [] + all_found_textline_polygons_marginals = [] + all_box_coord_marginals = [] + polygons_lines_xml = [] + contours_tables = [] + ocr_all_textlines = None + + pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines) if not self.dir_in: return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + if self.dir_in: self.writer.write_pagexml(pcgts) #self.logger.info("Job done in %.1fs", time.time() - t0) From 2c939049854c73c7dc27e4b04863c8498d654129 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 12 Sep 2024 17:35:28 +0200 Subject: [PATCH 23/50] avoiding double binarization --- qurator/eynollah/eynollah.py | 155 +++++++++++++++++++---------- qurator/eynollah/utils/__init__.py | 4 +- 2 files changed, 106 insertions(+), 53 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 533e2a0..569aec5 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -89,7 +89,7 @@ from .utils.xml import order_and_id_of_texts from .plot import EynollahPlotter from .writer import EynollahXmlWriter -MIN_AREA_REGION = 0.00001 +MIN_AREA_REGION = 0.000001 SLOPE_THRESHOLD = 0.13 RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: DPI_THRESHOLD = 298 @@ -237,15 +237,16 @@ class Eynollah: self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" - self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" + #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/model_3_eraly_layout_no_patches_1_2_spaltige" - self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/eynollah-textline_light_20210425" + self.model_textline_dir = dir_models + "/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" if self.ocr: @@ -267,7 +268,7 @@ class Eynollah: self.model_textline = self.our_load_model(self.model_textline_dir) self.model_region = self.our_load_model(self.model_region_dir_p_ens_light) self.model_region_1_2 = self.our_load_model(self.model_region_dir_p_1_2_sp_np) - self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) + ###self.model_region_fl_new = self.our_load_model(self.model_region_dir_fully_new) self.model_region_fl_np = self.our_load_model(self.model_region_dir_fully_np) self.model_region_fl = self.our_load_model(self.model_region_dir_fully) self.model_reading_order_machine = self.our_load_model(self.model_reading_order_machine_dir) @@ -993,9 +994,16 @@ class Eynollah: img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - + + seg_not_base = label_p_pred[0,:,:,4] + + seg_not_base[seg_not_base>0.4] =1 + seg_not_base[seg_not_base<1] =0 seg = np.argmax(label_p_pred, axis=3)[0] + + seg[seg_not_base==1]=4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) @@ -1781,7 +1789,7 @@ class Eynollah: all_box_coord_per_process.append(crop_coor) queue_of_all_params.put([slopes_per_each_subprocess, textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours]) - def textline_contours(self, img, patches, scaler_h, scaler_w): + def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) @@ -1792,10 +1800,34 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) #print(img.shape,'bin shape textline') if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) + if num_col_classifier==1: + prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, n_batch_inference=3) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) + if num_col_classifier==1: + prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) + + textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 + + old_art = np.copy(textline_mask_tot_ea_art) + + textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') + textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 + + textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 + textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') + textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 + + prediction_textline[:,:][old_art[:,:]==1]=2 + if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) else: @@ -1855,49 +1887,58 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 1000 + img_w_new = 900#1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 1500 + img_w_new = 1300#1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: - img_w_new = 2000 + img_w_new = 1600#2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: - img_w_new = 2500 + img_w_new = 1900#2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: - img_w_new = 3000 + img_w_new = 2300#3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 4000 + img_w_new = 3300#4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) t_bin = time.time() - if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - #print("inside bin ", time.time()-t_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) + #if (not self.input_binary) or self.full_layout: + #if self.input_binary: + #img_bin = np.copy(img_resized) + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if not self.dir_in: + model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + else: + prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + #print("inside bin ", time.time()-t_bin) + prediction_bin=prediction_bin[:,:,0] + prediction_bin = (prediction_bin[:,:]==0)*1 + prediction_bin = prediction_bin*255 + + prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + prediction_bin = prediction_bin.astype(np.uint16) + #img= np.copy(prediction_bin) + img_bin = np.copy(prediction_bin) + else: + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) - textline_mask_tot_ea = self.run_textline(img_bin) + ###textline_mask_tot_ea = self.run_textline(img_bin) + textline_mask_tot_ea = self.run_textline(img_bin, num_col_classifier) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -1906,20 +1947,20 @@ class Eynollah: #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: - ###if num_col_classifier == 1 or num_col_classifier == 2: - ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) - ###else: - ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + if num_col_classifier == 1 or num_col_classifier == 2: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + else: + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - ##if num_col_classifier == 1 or num_col_classifier == 2: - ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) - ##else: - ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) - prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) + if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + else: + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) @@ -1937,7 +1978,7 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=3) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=2) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2899,10 +2940,11 @@ class Eynollah: #print("enhancement in ", time.time()-t_in) return img_res, is_image_enhanced, num_col_classifier, num_column_is_classified - def run_textline(self, image_page): - scaler_h_textline = 1 # 1.2#1.2 - scaler_w_textline = 1 # 0.9#1 - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline) + def run_textline(self, image_page, num_col_classifier=None): + scaler_h_textline = 1#1.3 # 1.2#1.2 + scaler_w_textline = 1#1.3 # 0.9#1 + #print(image_page.shape) + textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3147,6 +3189,17 @@ class Eynollah: ##regions_fully_only_drop = put_drop_out_from_only_drop_model(regions_fully_only_drop, text_regions_p) ##regions_fully[:, :, 0][regions_fully_only_drop[:, :, 0] == 4] = 4 drop_capital_label_in_full_layout_model = 3 + + drops = (regions_fully[:,:,0]==drop_capital_label_in_full_layout_model)*1 + + drops= drops.astype(np.uint8) + + regions_fully[:,:,0][regions_fully[:,:,0]==drop_capital_label_in_full_layout_model] = 1 + + drops = cv2.erode(drops[:,:], KERNEL, iterations=1) + regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model + + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: @@ -3695,7 +3748,7 @@ class Eynollah: """ self.logger.debug("enter run") - skip_layout_ro = True + skip_layout_ro = False#True t0_tot = time.time() diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 929669f..8705ecf 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -792,11 +792,11 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.8: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: - layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = drop_capital_label + layout_in_patch[y : y + h, x : x + w, 0][layout_in_patch[y : y + h, x : x + w, 0] == drop_capital_label] = 1#drop_capital_label return layout_in_patch From 1b18ae874b9ea086e99ac76281dd30572f947471 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 13 Sep 2024 00:52:06 +0200 Subject: [PATCH 24/50] passing number of columns as an argument --- qurator/eynollah/cli.py | 14 +++++- qurator/eynollah/eynollah.py | 96 ++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index b0f55cd..357582c 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -191,6 +191,16 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i is_flag=True, help="if this parameter set to true, this tool will try to do ocr", ) +@click.option( + "--num_col_upper", + "-ncu", + help="lower limit of columns in document image", +) +@click.option( + "--num_col_lower", + "-ncl", + help="upper limit of columns in document image", +) @click.option( "--log_level", "-l", @@ -198,7 +208,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -235,6 +245,8 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s ignore_page_extraction=ignore_page_extraction, reading_order_machine_based=reading_order_machine_based, do_ocr=do_ocr, + num_col_upper=num_col_upper, + num_col_lower=num_col_lower, ) if dir_in: eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 569aec5..f76dce8 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -178,6 +178,8 @@ class Eynollah: ignore_page_extraction=False, reading_order_machine_based=False, do_ocr=False, + num_col_upper=None, + num_col_lower=None, override_dpi=None, logger=None, pcgts=None, @@ -212,6 +214,14 @@ class Eynollah: self.headers_off = headers_off self.ignore_page_extraction = ignore_page_extraction self.ocr = do_ocr + if num_col_upper: + self.num_col_upper = int(num_col_upper) + else: + self.num_col_upper = num_col_upper + if num_col_lower: + self.num_col_lower = int(num_col_lower) + else: + self.num_col_lower = num_col_lower self.pcgts = pcgts if not dir_in: self.plotter = None if not enable_plotting else EynollahPlotter( @@ -597,36 +607,80 @@ class Eynollah: else: img = self.imread() img_bin = None - + + width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) - if self.input_binary: - img_in = np.copy(img) - width_early = img_in.shape[1] - img_in = img_in / 255.0 - img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = img_in.reshape(1, 448, 448, 3) - else: - img_1ch = self.imread(grayscale=True) - width_early = img_1ch.shape[1] - img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + if self.num_col_upper and not self.num_col_lower: + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + elif self.num_col_lower and not self.num_col_upper: + num_col = self.num_col_lower + label_p_pred = [np.ones(6)] + + elif (not self.num_col_upper and not self.num_col_lower): + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] - img_1ch = img_1ch / 255.0 - img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) - img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) - img_in[0, :, :, 0] = img_1ch[:, :] - img_in[0, :, :, 1] = img_1ch[:, :] - img_in[0, :, :, 2] = img_1ch[:, :] + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] - if self.dir_in: - label_p_pred = self.model_classifier.predict(img_in, verbose=0) + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in, verbose=0) + else: + label_p_pred = model_num_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 + elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): + if self.input_binary: + img_in = np.copy(img) + img_in = img_in / 255.0 + img_in = cv2.resize(img_in, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = img_in.reshape(1, 448, 448, 3) + else: + img_1ch = self.imread(grayscale=True) + width_early = img_1ch.shape[1] + img_1ch = img_1ch[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]] + + img_1ch = img_1ch / 255.0 + img_1ch = cv2.resize(img_1ch, (448, 448), interpolation=cv2.INTER_NEAREST) + img_in = np.zeros((1, img_1ch.shape[0], img_1ch.shape[1], 3)) + img_in[0, :, :, 0] = img_1ch[:, :] + img_in[0, :, :, 1] = img_1ch[:, :] + img_in[0, :, :, 2] = img_1ch[:, :] + + + if self.dir_in: + label_p_pred = self.model_classifier.predict(img_in, verbose=0) + else: + label_p_pred = model_num_classifier.predict(img_in, verbose=0) + num_col = np.argmax(label_p_pred[0]) + 1 + + if num_col > self.num_col_upper: + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + if num_col < self.num_col_lower: + num_col = self.num_col_lower + label_p_pred = [np.ones(6)] + else: - label_p_pred = model_num_classifier.predict(img_in, verbose=0) - num_col = np.argmax(label_p_pred[0]) + 1 + num_col = self.num_col_upper + label_p_pred = [np.ones(6)] + self.logger.info("Found %d columns (%s)", num_col, np.around(label_p_pred, decimals=5)) From 21380fc8706474f0c6c791560fb6a5174d03aa8e Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 17 Sep 2024 15:06:41 +0200 Subject: [PATCH 25/50] scaling contours without dilation --- qurator/eynollah/eynollah.py | 207 +++++++++++++++++++++++++++++++---- 1 file changed, 184 insertions(+), 23 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index f76dce8..79cf98b 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -256,7 +256,7 @@ class Eynollah: ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: self.model_textline_dir = dir_models + "/eynollah-textline_20210425" if self.ocr: @@ -796,7 +796,7 @@ class Eynollah: return model, None - def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False): + def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -903,6 +903,13 @@ class Eynollah: seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -977,6 +984,14 @@ class Eynollah: seg[seg_not_base==1]=4 seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 indexer_inside_batch = 0 for i_batch, j_batch in zip(list_i_s, list_j_s): @@ -1845,42 +1860,50 @@ class Eynollah: def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') + thresholding_for_artificial_class_in_light_version = True#False if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) - img = img.astype(np.uint8) + #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] img_w = img_org.shape[1] img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) - #print(img.shape,'bin shape textline') + if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) - if num_col_classifier==1: - prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3) - if num_col_classifier==1: - prediction_textline_nopatch = self.do_prediction(False, img, model_textline) - prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + #if not thresholding_for_artificial_class_in_light_version: + #if num_col_classifier==1: + #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) + #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 prediction_textline = resize_image(prediction_textline, img_h, img_w) textline_mask_tot_ea_art = (prediction_textline[:,:]==2)*1 old_art = np.copy(textline_mask_tot_ea_art) - textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') - textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) - - prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 + if not thresholding_for_artificial_class_in_light_version: + textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') + textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + + prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 textline_mask_tot_ea_lines = (prediction_textline[:,:]==1)*1 textline_mask_tot_ea_lines = textline_mask_tot_ea_lines.astype('uint8') - textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) + + if not thresholding_for_artificial_class_in_light_version: + textline_mask_tot_ea_lines = cv2.dilate(textline_mask_tot_ea_lines, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_lines[:,:]==1]=1 - prediction_textline[:,:][old_art[:,:]==1]=2 + if not thresholding_for_artificial_class_in_light_version: + prediction_textline[:,:][old_art[:,:]==1]=2 if not self.dir_in: prediction_textline_longshot = self.do_prediction(False, img, model_textline) @@ -1959,7 +1982,7 @@ class Eynollah: img_w_new = 2300#3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 3300#4000 + img_w_new = 3000#4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) @@ -1968,7 +1991,7 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout):# or (not self.input_binary and num_col_classifier >= 3): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) @@ -3794,15 +3817,146 @@ class Eynollah: return textline_contour def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - + + def scale_contours(self,all_found_textline_polygons): + for i in range(len(all_found_textline_polygons[0])): + con_ind = all_found_textline_polygons[0][i] + x_min = np.min( con_ind[:,0,0] ) + y_min = np.min( con_ind[:,0,1] ) + + x_max = np.max( con_ind[:,0,0] ) + y_max = np.max( con_ind[:,0,1] ) + + x_mean = np.mean( con_ind[:,0,0] ) + y_mean = np.mean( con_ind[:,0,1] ) + + arg_y_max = np.argmax( con_ind[:,0,1] ) + arg_y_min = np.argmin( con_ind[:,0,1] ) + + x_cor_y_max = con_ind[arg_y_max,0,0] + x_cor_y_min = con_ind[arg_y_min,0,0] + + m_con = (y_max - y_min) / float(x_cor_y_max - x_cor_y_min) + + con_scaled = con_ind*1 + + con_scaled = con_scaled.astype(np.float) + + con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) + con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) + + + if (x_max - x_min) > (y_max - y_min): + + if (y_max-y_min)<=15: + con_scaled[:,0,1] = con_ind[:,0,1]*1.8 + + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.8*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + elif (y_max-y_min)<=30 and (y_max-y_min)>15: + con_scaled[:,0,1] = con_ind[:,0,1]*1.6 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.6*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + elif (y_max-y_min)>30 and (y_max-y_min)<100: + con_scaled[:,0,1] = con_ind[:,0,1]*1.35 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.35*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + else: + con_scaled[:,0,1] = con_ind[:,0,1]*1.2 + y_max_scaled = np.max(con_scaled[:,0,1]) + y_min_scaled = np.min(con_scaled[:,0,1]) + + y_max_expected = ( m_con*1.2*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) + con_scaled[:,0,0] = con_ind[:,0,0]*1.03 + + + + if y_max_expected<=y_max_scaled: + con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled + + con_scaled[:,0,1] = con_scaled[:,0,1]*(y_max_expected - y_min_scaled)/ (y_max_scaled - y_min_scaled) + con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled + + else: + + if (x_max-x_min)<=15: + con_scaled[:,0,0] = con_ind[:,0,0]*1.8 + elif (x_max-x_min)<=30 and (x_max-x_min)>15: + con_scaled[:,0,0] = con_ind[:,0,0]*1.6 + elif (x_max-x_min)>30 and (x_max-x_min)<100: + con_scaled[:,0,0] = con_ind[:,0,0]*1.35 + else: + con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + + + x_min_n = np.min( con_scaled[:,0,0] ) + y_min_n = np.min( con_scaled[:,0,1] ) + + x_mean_n = np.mean( con_scaled[:,0,0] ) + y_mean_n = np.mean( con_scaled[:,0,1] ) + + ##diff_x = (x_min_n - x_min)*1 + ##diff_y = (y_min_n - y_min)*1 + + diff_x = (x_mean_n - x_mean)*1 + diff_y = (y_mean_n - y_mean)*1 + + + con_scaled[:,0,0] = (con_scaled[:,0,0] - diff_x) + con_scaled[:,0,1] = (con_scaled[:,0,1] - diff_y) + + x_max_n = np.max( con_scaled[:,0,0] ) + y_max_n = np.max( con_scaled[:,0,1] ) + + diff_disp_x = (x_max_n - x_max) / 2. + diff_disp_y = (y_max_n - y_max) / 2. + + x_vals = np.array( np.abs(con_scaled[:,0,0] - diff_disp_x) ).astype(np.int16) + y_vals = np.array( np.abs(con_scaled[:,0,1] - diff_disp_y) ).astype(np.int16) + all_found_textline_polygons[0][i][:,0,0] = x_vals[:] + all_found_textline_polygons[0][i][:,0,1] = y_vals[:] + return all_found_textline_polygons + + def scale_contours_new(self, textline_mask_tot_ea): + + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) + all_found_textline_polygons1 = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + + textline_mask_tot_ea_res = resize_image(textline_mask_tot_ea, int( textline_mask_tot_ea.shape[0]*1.6), textline_mask_tot_ea.shape[1]) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea_res) + ##all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) + + for i in range(len(all_found_textline_polygons)): + + #x_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,0] ) + y_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,1] ) + + #x_mean = np.mean( all_found_textline_polygons[i][:,0,0] ) + y_mean = np.mean( all_found_textline_polygons[i][:,0,1] ) + + ydiff = y_mean - y_mean_1 + + all_found_textline_polygons[i][:,0,1] = all_found_textline_polygons[i][:,0,1] - ydiff + return all_found_textline_polygons + + def run(self): """ Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - skip_layout_ro = False#True + skip_layout_ro = True t0_tot = time.time() @@ -3820,7 +3974,6 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not skip_layout_ro: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) @@ -4032,6 +4185,7 @@ class Eynollah: if self.textline_light: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + else: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) @@ -4212,10 +4366,17 @@ class Eynollah: page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) + + ##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea) + cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) all_found_textline_polygons=[ all_found_textline_polygons ] + + all_found_textline_polygons = self.scale_contours(all_found_textline_polygons) + + order_text_new = [0] slopes =[0] id_of_texts_tot =['region_0001'] From a1f1f98de3ad7500c80bb5d183fc86aa66e031e5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 18 Sep 2024 00:08:54 +0200 Subject: [PATCH 26/50] updating scaling contours --- qurator/eynollah/eynollah.py | 82 ++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 79cf98b..bbfba0f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3821,23 +3821,51 @@ class Eynollah: def scale_contours(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] - x_min = np.min( con_ind[:,0,0] ) - y_min = np.min( con_ind[:,0,1] ) - x_max = np.max( con_ind[:,0,0] ) - y_max = np.max( con_ind[:,0,1] ) + con_ind = con_ind.astype(np.float) + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) - x_mean = np.mean( con_ind[:,0,0] ) - y_mean = np.mean( con_ind[:,0,1] ) + + m_arr = y_differential / x_differential + + #print(x_differential, 'x_differential') + + #print(y_differential, 'y_differential') + + #print(m_arr) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_mean = float(np.mean( con_ind[:,0,0] )) + y_mean = float(np.mean( con_ind[:,0,1] )) arg_y_max = np.argmax( con_ind[:,0,1] ) arg_y_min = np.argmin( con_ind[:,0,1] ) - x_cor_y_max = con_ind[arg_y_max,0,0] - x_cor_y_min = con_ind[arg_y_min,0,0] - m_con = (y_max - y_min) / float(x_cor_y_max - x_cor_y_min) + arg_x_max = np.argmax( con_ind[:,0,0] ) + arg_x_min = np.argmin( con_ind[:,0,0] ) + + x_cor_y_max = float(con_ind[arg_y_max,0,0]) + x_cor_y_min = float(con_ind[arg_y_min,0,0]) + + y_cor_x_max = float(con_ind[arg_x_max,0,1]) + y_cor_x_min = float(con_ind[arg_x_min,0,1]) + + if (x_cor_y_max - x_cor_y_min) != 0: + m_con = (y_max - y_min) / (x_cor_y_max - x_cor_y_min) + else: + m_con= None + + + m_con_x = (x_max - x_min) / (y_cor_x_max - y_cor_x_min) + #print(m_con,m_con_x, 'm_con') con_scaled = con_ind*1 con_scaled = con_scaled.astype(np.float) @@ -3845,7 +3873,6 @@ class Eynollah: con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) - if (x_max - x_min) > (y_max - y_min): if (y_max-y_min)<=15: @@ -3877,7 +3904,7 @@ class Eynollah: - + #print(m_con, (x_cor_y_max-x_cor_y_min),y_min_scaled, y_max_expected, y_max_scaled, "y_max_scaled") if y_max_expected<=y_max_scaled: con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled @@ -3885,17 +3912,48 @@ class Eynollah: con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled else: - + #print(x_max-x_min, m_con_x,'m_con_x') if (x_max-x_min)<=15: con_scaled[:,0,0] = con_ind[:,0,0]*1.8 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.8*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + elif (x_max-x_min)<=30 and (x_max-x_min)>15: con_scaled[:,0,0] = con_ind[:,0,0]*1.6 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.6*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + elif (x_max-x_min)>30 and (x_max-x_min)<100: con_scaled[:,0,0] = con_ind[:,0,0]*1.35 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.35*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + else: con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + + x_max_scaled = np.max(con_scaled[:,0,0]) + x_min_scaled = np.min(con_scaled[:,0,0]) + + x_max_expected = ( m_con_x*1.2*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) + con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + #print(x_max_expected, x_max_scaled, "x_max_scaled") + if x_max_expected<=x_max_scaled: + con_scaled[:,0,0] = con_scaled[:,0,0] - x_min_scaled + + con_scaled[:,0,0] = con_scaled[:,0,0]*(x_max_expected - x_min_scaled)/ (x_max_scaled - x_min_scaled) + con_scaled[:,0,0] = con_scaled[:,0,0] + x_min_scaled + x_min_n = np.min( con_scaled[:,0,0] ) y_min_n = np.min( con_scaled[:,0,1] ) From 5a07cd9cfa9713e8944195fff6416ed6e639c121 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 19 Sep 2024 16:21:55 +0200 Subject: [PATCH 27/50] the most effective version of contours dilation without opencv and all at once --- qurator/eynollah/eynollah.py | 258 +++++++++++++---------------------- 1 file changed, 97 insertions(+), 161 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index bbfba0f..cb70107 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1964,7 +1964,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 900#1000 + img_w_new = 800#1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -3818,196 +3818,132 @@ class Eynollah: def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] - def scale_contours(self,all_found_textline_polygons): + def dilate_textlines(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] con_ind = con_ind.astype(np.float) + x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - - m_arr = y_differential / x_differential - - #print(x_differential, 'x_differential') - - #print(y_differential, 'y_differential') - - #print(m_arr) - x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) x_max = float(np.max( con_ind[:,0,0] )) y_max = float(np.max( con_ind[:,0,1] )) + - x_mean = float(np.mean( con_ind[:,0,0] )) - y_mean = float(np.mean( con_ind[:,0,1] )) - - arg_y_max = np.argmax( con_ind[:,0,1] ) - arg_y_min = np.argmin( con_ind[:,0,1] ) - - - arg_x_max = np.argmax( con_ind[:,0,0] ) - arg_x_min = np.argmin( con_ind[:,0,0] ) - - x_cor_y_max = float(con_ind[arg_y_max,0,0]) - x_cor_y_min = float(con_ind[arg_y_min,0,0]) - - - y_cor_x_max = float(con_ind[arg_x_max,0,1]) - y_cor_x_min = float(con_ind[arg_x_min,0,1]) - - if (x_cor_y_max - x_cor_y_min) != 0: - m_con = (y_max - y_min) / (x_cor_y_max - x_cor_y_min) - else: - m_con= None - - - m_con_x = (x_max - x_min) / (y_cor_x_max - y_cor_x_min) - #print(m_con,m_con_x, 'm_con') - con_scaled = con_ind*1 - - con_scaled = con_scaled.astype(np.float) - - con_scaled[:,0,0] = con_scaled[:,0,0] - int(x_mean) - con_scaled[:,0,1] = con_scaled[:,0,1] - int(y_mean) - - if (x_max - x_min) > (y_max - y_min): - - if (y_max-y_min)<=15: - con_scaled[:,0,1] = con_ind[:,0,1]*1.8 - - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + + mult = x_biger_than_x*x_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if y_differential[0]==0: + y_differential[0] = 0.1 + + if y_differential[-1]==0: + y_differential[-1]= 0.1 - y_max_expected = ( m_con*1.8*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - elif (y_max-y_min)<=30 and (y_max-y_min)>15: - con_scaled[:,0,1] = con_ind[:,0,1]*1.6 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - y_max_expected = ( m_con*1.6*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - elif (y_max-y_min)>30 and (y_max-y_min)<100: - con_scaled[:,0,1] = con_ind[:,0,1]*1.35 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - y_max_expected = ( m_con*1.35*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - else: - con_scaled[:,0,1] = con_ind[:,0,1]*1.2 - y_max_scaled = np.max(con_scaled[:,0,1]) - y_min_scaled = np.min(con_scaled[:,0,1]) - - y_max_expected = ( m_con*1.2*(x_cor_y_max-x_cor_y_min) + y_min_scaled ) - con_scaled[:,0,0] = con_ind[:,0,0]*1.03 + y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - - #print(m_con, (x_cor_y_max-x_cor_y_min),y_min_scaled, y_max_expected, y_max_scaled, "y_max_scaled") - if y_max_expected<=y_max_scaled: - con_scaled[:,0,1] = con_scaled[:,0,1] - y_min_scaled + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] - con_scaled[:,0,1] = con_scaled[:,0,1]*(y_max_expected - y_min_scaled)/ (y_max_scaled - y_min_scaled) - con_scaled[:,0,1] = con_scaled[:,0,1] + y_min_scaled + y_differential.append(y_differential[0]) + + y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + + y_differential = np.array(y_differential) + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass + + else: - #print(x_max-x_min, m_con_x,'m_con_x') - if (x_max-x_min)<=15: - con_scaled[:,0,0] = con_ind[:,0,0]*1.8 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.8*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - elif (x_max-x_min)<=30 and (x_max-x_min)>15: - con_scaled[:,0,0] = con_ind[:,0,0]*1.6 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.6*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - elif (x_max-x_min)>30 and (x_max-x_min)<100: - con_scaled[:,0,0] = con_ind[:,0,0]*1.35 - - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - - x_max_expected = ( m_con_x*1.35*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - - else: - con_scaled[:,0,0] = con_ind[:,0,0]*1.2 + + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + + if x_differential[-1]==0: + x_differential[-1]= 0.1 - x_max_scaled = np.max(con_scaled[:,0,0]) - x_min_scaled = np.min(con_scaled[:,0,0]) - x_max_expected = ( m_con_x*1.2*(y_cor_x_max-y_cor_x_min) + x_min_scaled ) - con_scaled[:,0,1] = con_ind[:,0,1]*1.03 + x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - #print(x_max_expected, x_max_scaled, "x_max_scaled") - if x_max_expected<=x_max_scaled: - con_scaled[:,0,0] = con_scaled[:,0,0] - x_min_scaled + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] - con_scaled[:,0,0] = con_scaled[:,0,0]*(x_max_expected - x_min_scaled)/ (x_max_scaled - x_min_scaled) - con_scaled[:,0,0] = con_scaled[:,0,0] + x_min_scaled + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + + x_differential = np.array(x_differential) + + con_scaled = con_ind*1 + + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 - - x_min_n = np.min( con_scaled[:,0,0] ) - y_min_n = np.min( con_scaled[:,0,1] ) - - x_mean_n = np.mean( con_scaled[:,0,0] ) - y_mean_n = np.mean( con_scaled[:,0,1] ) - - ##diff_x = (x_min_n - x_min)*1 - ##diff_y = (y_min_n - y_min)*1 - - diff_x = (x_mean_n - x_mean)*1 - diff_y = (y_mean_n - y_mean)*1 - - - con_scaled[:,0,0] = (con_scaled[:,0,0] - diff_x) - con_scaled[:,0,1] = (con_scaled[:,0,1] - diff_y) - - x_max_n = np.max( con_scaled[:,0,0] ) - y_max_n = np.max( con_scaled[:,0,1] ) - - diff_disp_x = (x_max_n - x_max) / 2. - diff_disp_y = (y_max_n - y_max) / 2. - - x_vals = np.array( np.abs(con_scaled[:,0,0] - diff_disp_x) ).astype(np.int16) - y_vals = np.array( np.abs(con_scaled[:,0,1] - diff_disp_y) ).astype(np.int16) - all_found_textline_polygons[0][i][:,0,0] = x_vals[:] - all_found_textline_polygons[0][i][:,0,1] = y_vals[:] - return all_found_textline_polygons - - def scale_contours_new(self, textline_mask_tot_ea): - - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea) - all_found_textline_polygons1 = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - - textline_mask_tot_ea_res = resize_image(textline_mask_tot_ea, int( textline_mask_tot_ea.shape[0]*1.6), textline_mask_tot_ea.shape[1]) - cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea_res) - ##all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea_res, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) - - for i in range(len(all_found_textline_polygons)): - - #x_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,0] ) - y_mean_1 = np.mean( all_found_textline_polygons1[i][:,0,1] ) - #x_mean = np.mean( all_found_textline_polygons[i][:,0,0] ) - y_mean = np.mean( all_found_textline_polygons[i][:,0,1] ) + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - ydiff = y_mean - y_mean_1 + all_found_textline_polygons[0][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[0][i][:,0,0] = con_scaled[:,0, 0] - all_found_textline_polygons[i][:,0,1] = all_found_textline_polygons[i][:,0,1] - ydiff return all_found_textline_polygons - - def run(self): """ Get image and scales, then extract the page of scanned image @@ -4432,7 +4368,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.scale_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) order_text_new = [0] From 2d18739d9b267a14dfe0934b02772940976a8e72 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Sep 2024 15:08:09 +0200 Subject: [PATCH 28/50] postprocessing of textline contour dilation + skip layout and reading order passed as an argument --- qurator/eynollah/cli.py | 9 +++++++- qurator/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 357582c..b293403 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -201,6 +201,12 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i "-ncl", help="upper limit of columns in document image", ) +@click.option( + "--skip_layout_and_reading_order", + "-slro/-noslro", + is_flag=True, + help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.", +) @click.option( "--log_level", "-l", @@ -208,7 +214,7 @@ def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, i help="Override log level globally to this", ) -def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, ignore_page_extraction, log_level): +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, num_col_upper, num_col_lower, skip_layout_and_reading_order, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -247,6 +253,7 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s do_ocr=do_ocr, num_col_upper=num_col_upper, num_col_lower=num_col_lower, + skip_layout_and_reading_order=skip_layout_and_reading_order, ) if dir_in: eynollah.run() diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cb70107..0619ef0 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -180,6 +180,7 @@ class Eynollah: do_ocr=False, num_col_upper=None, num_col_lower=None, + skip_layout_and_reading_order = False, override_dpi=None, logger=None, pcgts=None, @@ -213,6 +214,7 @@ class Eynollah: self.allow_scaling = allow_scaling self.headers_off = headers_off self.ignore_page_extraction = ignore_page_extraction + self.skip_layout_and_reading_order = skip_layout_and_reading_order self.ocr = do_ocr if num_col_upper: self.num_col_upper = int(num_col_upper) @@ -1951,7 +1953,7 @@ class Eynollah: q.put(slopes_sub) poly.put(poly_sub) box_sub.put(boxes_sub_new) - def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False): + def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=False): self.logger.debug("enter get_regions_light_v") t_in = time.time() erosion_hurts = False @@ -2019,7 +2021,7 @@ class Eynollah: textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) - if not skip_layout_ro: + if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) #print(img_resized.shape, num_col_classifier, "num_col_classifier") @@ -3818,6 +3820,30 @@ class Eynollah: def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes): return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] + def return_it_in_two_groups(self,x_differential): + split = [ind if x_differential[ind]!=x_differential[ind+1] else -1 for ind in range(len(x_differential)-1)] + + split_masked = list( np.array(split[:])[np.array(split[:])!=-1] ) + + if 0 not in split_masked: + split_masked.insert(0, -1) + + split_masked.append(len(x_differential)-1) + + split_masked = np.array(split_masked) +1 + + sums = [np.sum(x_differential[split_masked[ind]:split_masked[ind+1]]) for ind in range(len(split_masked)-1)] + + indexes_to_bec_changed = [ind if ( np.abs(sums[ind-1]) > np.abs(sums[ind]) and np.abs(sums[ind+1]) > np.abs(sums[ind])) else -1 for ind in range(1,len(sums)-1) ] + + indexes_to_bec_changed_filtered = np.array(indexes_to_bec_changed)[np.array(indexes_to_bec_changed)!=-1] + + x_differential_new = np.copy(x_differential) + for i in indexes_to_bec_changed_filtered: + x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] + + return x_differential_new + def dilate_textlines(self,all_found_textline_polygons): for i in range(len(all_found_textline_polygons[0])): con_ind = all_found_textline_polygons[0][i] @@ -3863,6 +3889,8 @@ class Eynollah: y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + y_differential = self.return_it_in_two_groups(y_differential) + y_differential = np.array(y_differential) @@ -3890,7 +3918,6 @@ class Eynollah: else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) mult = y_biger_than_x*y_differential @@ -3918,8 +3945,10 @@ class Eynollah: x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + x_differential = self.return_it_in_two_groups(x_differential) x_differential = np.array(x_differential) + con_scaled = con_ind*1 con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential @@ -3949,8 +3978,6 @@ class Eynollah: Get image and scales, then extract the page of scanned image """ self.logger.debug("enter run") - - skip_layout_ro = True t0_tot = time.time() @@ -3968,7 +3995,7 @@ class Eynollah: self.logger.info("Enhancing took %.1fs ", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() - if not skip_layout_ro: + if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) #print("text region early -2 in %.1fs", time.time() - t0) @@ -4356,7 +4383,7 @@ class Eynollah: return pcgts #print("text region early 7 in %.1fs", time.time() - t0) else: - _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro) + _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light) From b9e8959c4aefb0b9d24efb99abc309d7d350163c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 20 Sep 2024 16:33:13 +0200 Subject: [PATCH 29/50] update of light versions --- qurator/eynollah/eynollah.py | 238 ++++++++++++++++++----------------- 1 file changed, 126 insertions(+), 112 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0619ef0..c7407e2 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1862,7 +1862,10 @@ class Eynollah: def textline_contours(self, img, patches, scaler_h, scaler_w, num_col_classifier=None): self.logger.debug('enter textline_contours') - thresholding_for_artificial_class_in_light_version = True#False + if self.textline_light: + thresholding_for_artificial_class_in_light_version = True#False + else: + thresholding_for_artificial_class_in_light_version = False if not self.dir_in: model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) #img = img.astype(np.uint8) @@ -2016,7 +2019,7 @@ class Eynollah: #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) - textline_mask_tot_ea = self.run_textline(img_bin, num_col_classifier) + textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) @@ -2057,7 +2060,8 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=2) + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2097,6 +2101,7 @@ class Eynollah: polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) text_regions_p_true = np.zeros(prediction_regions_org.shape) @@ -3845,132 +3850,139 @@ class Eynollah: return x_differential_new def dilate_textlines(self,all_found_textline_polygons): - for i in range(len(all_found_textline_polygons[0])): - con_ind = all_found_textline_polygons[0][i] - - con_ind = con_ind.astype(np.float) - - x_differential = np.diff( con_ind[:,0,0]) - y_differential = np.diff( con_ind[:,0,1]) - - x_min = float(np.min( con_ind[:,0,0] )) - y_min = float(np.min( con_ind[:,0,1] )) - - x_max = float(np.max( con_ind[:,0,0] )) - y_max = float(np.max( con_ind[:,0,1] )) - - - if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + for j in range(len(all_found_textline_polygons)): + for i in range(len(all_found_textline_polygons[j])): + con_ind = all_found_textline_polygons[j][i] - x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + con_ind = con_ind.astype(np.float) - mult = x_biger_than_x*x_differential + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) - if y_differential[0]==0: - y_differential[0] = 0.1 + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + - if y_differential[-1]==0: - y_differential[-1]= 0.1 + if (y_max - y_min) > (x_max - x_min) and (x_max - x_min)<70: + x_biger_than_x = np.abs(x_differential) > np.abs(y_differential) + mult = x_biger_than_x*x_differential - y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - - - if y_differential[0]==0.1: - y_differential[0] = y_differential[1] - if y_differential[-1]==0.1: - y_differential[-1] = y_differential[-2] + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) - y_differential.append(y_differential[0]) - - y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] - - y_differential = self.return_it_in_two_groups(y_differential) - - y_differential = np.array(y_differential) - - - con_scaled = con_ind*1 - - con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential - - con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 - con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 - - try: - con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 - con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 - except: - pass - - con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 - con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 - - try: - con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 - con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 - except: - pass - - - else: - y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) - - mult = y_biger_than_x*y_differential - - arg_min_mult = np.argmin(mult) - arg_max_mult = np.argmax(mult) - - if x_differential[0]==0: - x_differential[0] = 0.1 - - if x_differential[-1]==0: - x_differential[-1]= 0.1 + if y_differential[0]==0: + y_differential[0] = 0.1 + if y_differential[-1]==0: + y_differential[-1]= 0.1 + + + + y_differential = [y_differential[ind] if y_differential[ind]!=0 else (y_differential[ind-1] + y_differential[ind+1])/2. for ind in range(len(y_differential)) ] - x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] - - - if x_differential[0]==0.1: - x_differential[0] = x_differential[1] - if x_differential[-1]==0.1: - x_differential[-1] = x_differential[-2] + if y_differential[0]==0.1: + y_differential[0] = y_differential[1] + if y_differential[-1]==0.1: + y_differential[-1] = y_differential[-2] + + y_differential.append(y_differential[0]) - x_differential.append(x_differential[0]) - - x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] - - x_differential = self.return_it_in_two_groups(x_differential) - x_differential = np.array(x_differential) - - - con_scaled = con_ind*1 - - con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential - - con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 - con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + y_differential = [-1 if y_differential[ind]<0 else 1 for ind in range(len(y_differential))] + + y_differential = self.return_it_in_two_groups(y_differential) + + y_differential = np.array(y_differential) + + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] - 8*y_differential + + con_scaled[arg_min_mult,0, 1] = con_ind[arg_min_mult,0,1] + 8 + con_scaled[arg_min_mult+1,0, 1] = con_ind[arg_min_mult+1,0,1] + 8 + + try: + con_scaled[arg_min_mult-1,0, 1] = con_ind[arg_min_mult-1,0,1] + 5 + con_scaled[arg_min_mult+2,0, 1] = con_ind[arg_min_mult+2,0,1] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 1] = con_ind[arg_max_mult,0,1] - 8 + con_scaled[arg_max_mult+1,0, 1] = con_ind[arg_max_mult+1,0,1] - 8 + + try: + con_scaled[arg_max_mult-1,0, 1] = con_ind[arg_max_mult-1,0,1] - 5 + con_scaled[arg_max_mult+2,0, 1] = con_ind[arg_max_mult+2,0,1] - 5 + except: + pass - con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 - con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 - con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 - con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + else: + y_biger_than_x = np.abs(y_differential) > np.abs(x_differential) + + mult = y_biger_than_x*y_differential + + arg_min_mult = np.argmin(mult) + arg_max_mult = np.argmax(mult) + + if x_differential[0]==0: + x_differential[0] = 0.1 + + if x_differential[-1]==0: + x_differential[-1]= 0.1 + + + + x_differential = [x_differential[ind] if x_differential[ind]!=0 else (x_differential[ind-1] + x_differential[ind+1])/2. for ind in range(len(x_differential)) ] + + + if x_differential[0]==0.1: + x_differential[0] = x_differential[1] + if x_differential[-1]==0.1: + x_differential[-1] = x_differential[-2] + + x_differential.append(x_differential[0]) + + x_differential = [-1 if x_differential[ind]<0 else 1 for ind in range(len(x_differential))] + + x_differential = self.return_it_in_two_groups(x_differential) + x_differential = np.array(x_differential) + + + con_scaled = con_ind*1 + + con_scaled[:,0, 1] = con_ind[:,0,1] + 8*x_differential + + con_scaled[arg_min_mult,0, 0] = con_ind[arg_min_mult,0,0] + 8 + con_scaled[arg_min_mult+1,0, 0] = con_ind[arg_min_mult+1,0,0] + 8 + + try: + con_scaled[arg_min_mult-1,0, 0] = con_ind[arg_min_mult-1,0,0] + 5 + con_scaled[arg_min_mult+2,0, 0] = con_ind[arg_min_mult+2,0,0] + 5 + except: + pass + + con_scaled[arg_max_mult,0, 0] = con_ind[arg_max_mult,0,0] - 8 + con_scaled[arg_max_mult+1,0, 0] = con_ind[arg_max_mult+1,0,0] - 8 + + try: + con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 + con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + except: + pass + - con_scaled[arg_max_mult-1,0, 0] = con_ind[arg_max_mult-1,0,0] - 5 - con_scaled[arg_max_mult+2,0, 0] = con_ind[arg_max_mult+2,0,0] - 5 + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 - con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 - - all_found_textline_polygons[0][i][:,0,1] = con_scaled[:,0, 1] - all_found_textline_polygons[0][i][:,0,0] = con_scaled[:,0, 0] + all_found_textline_polygons[j][i][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons def run(self): @@ -4207,6 +4219,8 @@ class Eynollah: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + else: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) From 5d680136a4ed752e398cd47d3be0fd5aaf698f13 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 21 Sep 2024 01:04:28 +0200 Subject: [PATCH 30/50] updating light version --- qurator/eynollah/eynollah.py | 45 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index c7407e2..629818f 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -260,7 +260,7 @@ class Eynollah: if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -1916,11 +1916,7 @@ class Eynollah: prediction_textline_longshot = self.do_prediction(False, img, self.model_textline) prediction_textline_longshot_true_size = resize_image(prediction_textline_longshot, img_h, img_w) - - if self.textline_light: - return (prediction_textline[:, :, 0]==1)*1, (prediction_textline_longshot_true_size[:, :, 0]==1)*1 - else: - return prediction_textline[:, :, 0], prediction_textline_longshot_true_size[:, :, 0] + return ((prediction_textline[:, :, 0]==1)*1).astype('uint8'), ((prediction_textline_longshot_true_size[:, :, 0]==1)*1).astype('uint8') def do_work_of_slopes(self, q, poly, box_sub, boxes_per_process, textline_mask_tot, contours_per_process): @@ -1996,7 +1992,7 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout):# or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) @@ -4066,8 +4062,35 @@ class Eynollah: t1 = time.time() #plt.imshow(table_prediction) #plt.show() - + if self.light_version and num_col_classifier in (1,2): + org_h_l_m = textline_mask_tot_ea.shape[0] + org_w_l_m = textline_mask_tot_ea.shape[1] + if num_col_classifier == 1: + img_w_new = 2000 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + elif num_col_classifier == 2: + img_w_new = 2400 + img_h_new = int(textline_mask_tot_ea.shape[0] / float(textline_mask_tot_ea.shape[1]) * img_w_new) + + image_page = resize_image(image_page,img_h_new, img_w_new ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) + mask_images = resize_image(mask_images,img_h_new, img_w_new ) + mask_lines = resize_image(mask_lines,img_h_new, img_w_new ) + text_regions_p_1 = resize_image(text_regions_p_1,img_h_new, img_w_new ) + table_prediction = resize_image(table_prediction,img_h_new, img_w_new ) + textline_mask_tot, text_regions_p, image_page_rotated = self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) + + if self.light_version and num_col_classifier in (1,2): + image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) + textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) + text_regions_p = resize_image(text_regions_p,org_h_l_m, org_w_l_m ) + textline_mask_tot = resize_image(textline_mask_tot,org_h_l_m, org_w_l_m ) + text_regions_p_1 = resize_image(text_regions_p_1,org_h_l_m, org_w_l_m ) + table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) + image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) + self.logger.info("detection of marginals took %.1fs", time.time() - t1) #print("text region early 2 marginal in %.1fs", time.time() - t0) t1 = time.time() @@ -4222,18 +4245,20 @@ class Eynollah: all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: + textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) else: scale_param = 1 - all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) - all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) + all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: From 7f08458436d1f6aad43f809b3a388c8c275d44f7 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Sat, 21 Sep 2024 14:39:54 +0200 Subject: [PATCH 31/50] dilation of text regions without opencv --- qurator/eynollah/eynollah.py | 84 +++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 629818f..b2dea47 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -37,9 +37,7 @@ from tensorflow.keras.models import load_model sys.stderr = stderr tf.get_logger().setLevel("ERROR") warnings.filterwarnings("ignore") -from scipy.signal import find_peaks import matplotlib.pyplot as plt -from scipy.ndimage import gaussian_filter1d from tensorflow.python.keras.backend import set_session from tensorflow.keras import layers @@ -2056,8 +2054,8 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -2097,6 +2095,8 @@ class Eynollah: polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) + ##polygons_of_only_texts = self.dilate_textregions_contours(polygons_of_only_texts) + polygons_of_only_lines = return_contours_of_interested_region(mask_lines_only,1,0.00001) @@ -3845,6 +3845,79 @@ class Eynollah: return x_differential_new + def dilate_textregions_contours(self,all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + + con_ind = all_found_textline_polygons[j] + + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_differential = gaussian_filter1d(x_differential, 3) + y_differential = gaussian_filter1d(y_differential, 3) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + + ###inc_x =list(inc_x) + ###inc_x.append(inc_x[0]) + + ###inc_y =list(inc_y) + ###inc_y.append(inc_y[0]) + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons + + + + + + + + def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): @@ -4096,7 +4169,7 @@ class Eynollah: t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - + polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: if not self.light_version: img_bin_light = None @@ -4230,6 +4303,7 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + txt_con_org = self.dilate_textregions_contours(txt_con_org) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) From 62f8ae486043ddf9e39b057e754cc28081275ce3 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 23 Sep 2024 14:03:07 +0200 Subject: [PATCH 32/50] updating dilation of textlines and text regions --- qurator/eynollah/eynollah.py | 96 +++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index b2dea47..fb2d699 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3855,6 +3855,7 @@ class Eynollah: x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) + x_differential = gaussian_filter1d(x_differential, 3) y_differential = gaussian_filter1d(y_differential, 3) @@ -3912,6 +3913,93 @@ class Eynollah: return all_found_textline_polygons + def dilate_textline_contours(self,all_found_textline_polygons): + for j in range(len(all_found_textline_polygons)): + for ij in range(len(all_found_textline_polygons[j])): + + con_ind = all_found_textline_polygons[j][ij] + + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + x_differential = gaussian_filter1d(x_differential, 3) + y_differential = gaussian_filter1d(y_differential, 3) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + + #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) + ##if (y_max-y_min)<40: + ##dilation_m1 = 5 + ##dilation_m2 = int(dilation_m1/2.) +1 + ##else: + ##dilation_m1 = 12 + ##dilation_m2 = int(dilation_m1/2.) +1 + + if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 1/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + else: + dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + dilation_m2 = int(dilation_m1/2.) +1 + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + ###inc_x =list(inc_x) + ###inc_x.append(inc_x[0]) + + ###inc_y =list(inc_y) + ###inc_y.append(inc_y[0]) + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons @@ -4174,6 +4262,7 @@ class Eynollah: if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) + polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -4304,6 +4393,7 @@ class Eynollah: if self.light_version: txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) txt_con_org = self.dilate_textregions_contours(txt_con_org) + contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) @@ -4316,7 +4406,9 @@ class Eynollah: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) - all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) @@ -4508,7 +4600,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) order_text_new = [0] From 6626dc68660d239cf8a4a15b64e8bb670e395409 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 23 Sep 2024 15:50:37 +0200 Subject: [PATCH 33/50] updating textline dilation parameters --- qurator/eynollah/eynollah.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index fb2d699..a69854d 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3950,16 +3950,26 @@ class Eynollah: ##dilation_m1 = 12 ##dilation_m2 = int(dilation_m1/2.) +1 - if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + dilation_m1 = int( (y_max-y_min) * 2/20.0 ) elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + dilation_m1 = int( (x_max-x_min) * 2/20.0 ) elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: dilation_m1 = int( (x_max-x_min) * 1/20.0 ) else: dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + + if dilation_m1>12: + dilation_m1 = 12 + if dilation_m1<4: + dilation_m1 = 4 + #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): From b33739adeef5cd40b48faa3a955cd1d473b5e250 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 24 Sep 2024 16:06:27 +0200 Subject: [PATCH 34/50] parametriyation in the case of textline contours dilation is accomplished --- qurator/eynollah/eynollah.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index a69854d..8c0979d 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -3919,6 +3919,8 @@ class Eynollah: con_ind = all_found_textline_polygons[j][ij] + area = cv2.contourArea(con_ind) + con_ind = con_ind.astype(np.float) x_differential = np.diff( con_ind[:,0,0]) @@ -3943,6 +3945,7 @@ class Eynollah: #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) + #print(area / (x_max-x_min)) ##if (y_max-y_min)<40: ##dilation_m1 = 5 ##dilation_m2 = int(dilation_m1/2.) +1 @@ -3950,20 +3953,26 @@ class Eynollah: ##dilation_m1 = 12 ##dilation_m2 = int(dilation_m1/2.) +1 - if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 5/20.0 ) - elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 2/20.0 ) - elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: - dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 5/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 2/20.0 ) - elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: - dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + #########if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 5/20.0 ) + #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 2/20.0 ) + #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: + #########dilation_m1 = int( (y_max-y_min) * 1/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 5/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 2/20.0 ) + #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: + #########dilation_m1 = int( (x_max-x_min) * 1/20.0 ) + #########else: + #########dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.35) else: - dilation_m1 = int( (y_max-y_min) * 4/20.0 ) + dilation_m1 = round(area / (y_max-y_min) * 0.35) + if dilation_m1>12: dilation_m1 = 12 From 95effe54a0159811b80c7ca5bd9147d196ef5187 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 25 Sep 2024 20:00:53 +0200 Subject: [PATCH 35/50] updating textregions dilation --- qurator/eynollah/eynollah.py | 151 ++++++++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 12 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 8c0979d..794ebe6 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2054,7 +2054,7 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -3846,18 +3846,22 @@ class Eynollah: return x_differential_new def dilate_textregions_contours(self,all_found_textline_polygons): + #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] - + area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) + con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.1) + con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.1) + x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, 3) - y_differential = gaussian_filter1d(y_differential, 3) + x_differential = gaussian_filter1d(x_differential, .5) + y_differential = gaussian_filter1d(y_differential, .5) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) @@ -3873,23 +3877,54 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.12) + else: + dilation_m1 = round(area / (y_max-y_min) * 0.12) + + if dilation_m1>8: + dilation_m1 = 8 + if dilation_m1<5: + dilation_m1 = 5 + #print(dilation_m1, 'dilation_m1') + dilation_m2 = int(dilation_m1/2.) +1 + for i in range(len(x_differential)): if abs_diff[i]==0: - inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) elif abs_diff[i]!=0 and abs_diff[i]>=3: if abs(x_differential[i])>abs(y_differential[i]): - inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) else: - inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) else: - inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + ###for i in range(len(x_differential)): + ###if abs_diff[i]==0: + ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) + ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + + ###elif abs_diff[i]!=0 and abs_diff[i]>=3: + ###if abs(x_differential[i])>abs(y_differential[i]): + ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) + ###else: + ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) + ###else: + ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) + ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) ###inc_x =list(inc_x) ###inc_x.append(inc_x[0]) @@ -3908,6 +3943,98 @@ class Eynollah: con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + #print(results,'results') + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + #print(area_scaled / area, "ratio") + #print(results,'results') + #if results[0]==1 and diff_result[-1]==-2: + ##indices_2 = indices_2[1:] + ##indices_m2 = indices_m2[1:] + + #con_scaled[:indices_m2[0]+1,0, 1] = con_scaled[indices_m2[-1],0, 1] + #con_scaled[:indices_m2[0]+1,0, 0] = con_scaled[indices_m2[-1],0, 0] + + + #con_scaled[indices_2[-1]+1:,0, 1] = con_scaled[indices_m2[-1],0, 1] + #con_scaled[indices_2[-1]+1:,0, 0] = con_scaled[indices_m2[-1],0, 0] + + #indices_2 = indices_2[:-1] + #indices_m2 = indices_m2[1:-1] + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + #indices_2 = indices_2[1:] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + indices_2 = indices_2[:-1] + + + + #diff_neg_pos = np.array(indices_m2) - np.array(indices_2) + + + #print(diff_neg_pos,'diff') + ##print(indices_2, 'indices_2') + #indices_2 = np.array(indices_2)[diff_neg_pos>1] + #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] + + for ii in range(len(indices_2)): + + #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] + #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] + + #if x_inner[-1]>=x_inner[0]: + #x_interest = np.min(x_inner) + #else: + #x_interest = np.max(x_inner) + + #if y_inner[-1]>=y_inner[0]: + #y_interest = np.min(y_inner) + #else: + #y_interest = np.max(y_inner) + + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + + + + #con_scaled[:,0, 1][results[:]>0] = con_ind[:,0,1][results[:]>0] + #con_scaled[:,0, 0][results[:]>0] = con_ind[:,0,0][results[:]>0] + + #print(list(results), 'results') + #print(list(diff_result), 'diff_result') + #print(indices_2,'2') + #print(indices_m2,'-2') + #print(diff_neg_pos,'diff_neg_pos') + + #con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) + #con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) + + con_scaled[-1,0, 1] = con_scaled[0,0, 1] + con_scaled[-1,0, 0] = con_scaled[0,0, 0] all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons From 133091137dc01f04eedf153119a04559a8f0633d Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 27 Sep 2024 13:57:01 +0200 Subject: [PATCH 36/50] dilation of textregions and marginals are accomplished --- qurator/eynollah/eynollah.py | 454 ++++++++++++++++++++++++----------- 1 file changed, 314 insertions(+), 140 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 794ebe6..2fe7325 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1050,7 +1050,7 @@ class Eynollah: #del model #gc.collect() return prediction_true - def do_prediction_new_concept(self, patches, img, model, marginal_of_patch_percent=0.1): + def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") img_height_model = model.layers[len(model.layers) - 1].output_shape[1] @@ -1064,14 +1064,14 @@ class Eynollah: label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - seg_not_base = label_p_pred[0,:,:,4] + #seg_not_base = label_p_pred[0,:,:,4] - seg_not_base[seg_not_base>0.4] =1 - seg_not_base[seg_not_base<1] =0 + #seg_not_base[seg_not_base>0.4] =1 + #seg_not_base[seg_not_base<1] =0 seg = np.argmax(label_p_pred, axis=3)[0] - seg[seg_not_base==1]=4 + #seg[seg_not_base==1]=4 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -1099,6 +1099,16 @@ class Eynollah: nyf = img_h / float(height_mid) nxf = int(nxf) + 1 if nxf > int(nxf) else int(nxf) nyf = int(nyf) + 1 if nyf > int(nyf) else int(nyf) + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) for i in range(nxf): for j in range(nyf): @@ -1120,44 +1130,57 @@ class Eynollah: if index_y_u > img_h: index_y_u = img_h index_y_d = img_h - img_height_model + + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + - img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] - label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), - verbose=0) - seg = np.argmax(label_p_pred, axis=3)[0] + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + #img_patch = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + #label_p_pred = model.predict(img_patch.reshape(1, img_patch.shape[0], img_patch.shape[1], img_patch.shape[2]), + #verbose=0) + #seg = np.argmax(label_p_pred, axis=3)[0] - seg_not_base = label_p_pred[0,:,:,4] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_not_base = label_p_pred[0,:,:,4] + ########seg2 = -label_p_pred[0,:,:,2] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + ######seg_not_base[seg_not_base>0.03] =1 + ######seg_not_base[seg_not_base<1] =0 - seg_test = label_p_pred[0,:,:,1] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_test = label_p_pred[0,:,:,1] + ########seg2 = -label_p_pred[0,:,:,2] - seg_test[seg_test>0.75] =1 - seg_test[seg_test<1] =0 + ######seg_test[seg_test>0.75] =1 + ######seg_test[seg_test<1] =0 - seg_line = label_p_pred[0,:,:,3] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_line = label_p_pred[0,:,:,3] + ########seg2 = -label_p_pred[0,:,:,2] - seg_line[seg_line>0.1] =1 - seg_line[seg_line<1] =0 + ######seg_line[seg_line>0.1] =1 + ######seg_line[seg_line<1] =0 - seg_background = label_p_pred[0,:,:,0] - ##seg2 = -label_p_pred[0,:,:,2] + ######seg_background = label_p_pred[0,:,:,0] + ########seg2 = -label_p_pred[0,:,:,2] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + ######seg_background[seg_background>0.25] =1 + ######seg_background[seg_background<1] =0 ##seg = seg+seg2 #seg = label_p_pred[0,:,:,2] #seg[seg>0.4] =1 @@ -1170,56 +1193,221 @@ class Eynollah: ##plt.show() #seg[seg==1]=0 #seg[seg_test==1]=1 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 - seg[(seg_line==1) & (seg==0)]=3 - seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) - - if i == 0 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i == 0 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color - elif i == nxf - 1 and j != 0 and j != nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color - elif i != 0 and i != nxf - 1 and j == 0: - seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color - elif i != 0 and i != nxf - 1 and j == nyf - 1: - seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color - else: - seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] - seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] - mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg - prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + ######seg[seg_not_base==1]=4 + ######seg[seg_background==1]=0 + ######seg[(seg_line==1) & (seg==0)]=3 + #seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + + #if i == 0 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + #seg = seg[0 : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i == 0 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - 0, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i == 0 and j != 0 and j != nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - margin, 0 : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + 0 : index_x_u - margin, :] = seg_color + #elif i == nxf - 1 and j != 0 and j != nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - 0] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - 0, :] = seg_color + #elif i != 0 and i != nxf - 1 and j == 0: + #seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + #seg = seg[0 : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + 0 : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + #elif i != 0 and i != nxf - 1 and j == nyf - 1: + #seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - 0, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - 0, index_x_d + margin : index_x_u - margin, :] = seg_color + #else: + #seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + #seg = seg[margin : seg.shape[0] - margin, margin : seg.shape[1] - margin] + #mask_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin] = seg + #prediction_true[index_y_d + margin : index_y_u - margin, index_x_d + margin : index_x_u - margin, :] = seg_color + + + if batch_indexer == n_batch_inference: + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + if thresholding_for_some_classes_in_light_version: + seg_not_base = label_p_pred[:,:,:,4] + seg_not_base[seg_not_base>0.03] =1 + seg_not_base[seg_not_base<1] =0 + + seg_line = label_p_pred[:,:,:,3] + seg_line[seg_line>0.1] =1 + seg_line[seg_line<1] =0 + + seg_background = label_p_pred[:,:,:,0] + seg_background[seg_background>0.25] =1 + seg_background[seg_background<1] =0 + + seg[seg_not_base==1]=4 + seg[seg_background==1]=0 + seg[(seg_line==1) & (seg==0)]=3 + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[:,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, img_height_model, img_width_model, 3)) prediction_true = prediction_true.astype(np.uint8) return prediction_true @@ -1963,7 +2151,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 800#1000 + img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -1971,17 +2159,17 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: - img_w_new = 1600#2000 + img_w_new = 2000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 4: - img_w_new = 1900#2500 + img_w_new = 2500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 5: - img_w_new = 2300#3000 + img_w_new = 3000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) else: - img_w_new = 3000#4000 + img_w_new = 4000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) img_resized = resize_image(img,img_h_new, img_w_new ) @@ -2025,17 +2213,17 @@ class Eynollah: if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region, n_batch_inference=1) else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light, n_batch_inference=3) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) + prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2, n_batch_inference=1) else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) + prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -2054,8 +2242,12 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + + mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) + mask_images_only=(prediction_regions_org[:,:] ==2)*1 @@ -3150,7 +3342,14 @@ class Eynollah: pixel_img = 4 min_area_mar = 0.00001 - polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) + if self.light_version: + marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = marginal_mask.astype('uint8') + marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) + + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) + else: + polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) @@ -3241,7 +3440,15 @@ class Eynollah: pixel_img = 4 min_area_mar = 0.00001 - polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) + + if self.light_version: + marginal_mask = (text_regions_p[:,:]==pixel_img)*1 + marginal_mask = marginal_mask.astype('uint8') + marginal_mask = cv2.dilate(marginal_mask, KERNEL, iterations=2) + + polygons_of_marginals = return_contours_of_interested_region(marginal_mask, 1, min_area_mar) + else: + polygons_of_marginals = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) @@ -3850,18 +4057,19 @@ class Eynollah: for j in range(len(all_found_textline_polygons)): con_ind = all_found_textline_polygons[j] + #print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.1) - con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.1) + #con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.5) + #con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.5) x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) - x_differential = gaussian_filter1d(x_differential, .5) - y_differential = gaussian_filter1d(y_differential, .5) + x_differential = gaussian_filter1d(x_differential, 0.1) + y_differential = gaussian_filter1d(y_differential, 0.1) x_min = float(np.min( con_ind[:,0,0] )) y_min = float(np.min( con_ind[:,0,1] )) @@ -3885,8 +4093,8 @@ class Eynollah: if dilation_m1>8: dilation_m1 = 8 - if dilation_m1<5: - dilation_m1 = 5 + if dilation_m1<6: + dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') dilation_m2 = int(dilation_m1/2.) +1 @@ -4002,7 +4210,6 @@ class Eynollah: #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] for ii in range(len(indices_2)): - #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] @@ -4030,11 +4237,12 @@ class Eynollah: #print(indices_m2,'-2') #print(diff_neg_pos,'diff_neg_pos') - #con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) - #con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) + ##con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) + ##con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) - con_scaled[-1,0, 1] = con_scaled[0,0, 1] - con_scaled[-1,0, 0] = con_scaled[0,0, 0] + #con_scaled[-1,0, 1] = con_scaled[0,0, 1] + #con_scaled[-1,0, 0] = con_scaled[0,0, 0] + ##print(len(con_scaled[:,0,0]),'con_scaled[:,0,0]') all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons @@ -4045,7 +4253,7 @@ class Eynollah: for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] - + print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -4069,31 +4277,6 @@ class Eynollah: inc_x = np.zeros(len(x_differential)+1) inc_y = np.zeros(len(x_differential)+1) - - - #print(y_max-y_min, x_max-x_min,(y_max-y_min)/(x_max-x_min), (x_max-x_min)/(y_max-y_min) ) - #print(area / (x_max-x_min)) - ##if (y_max-y_min)<40: - ##dilation_m1 = 5 - ##dilation_m2 = int(dilation_m1/2.) +1 - ##else: - ##dilation_m1 = 12 - ##dilation_m2 = int(dilation_m1/2.) +1 - - #########if (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))<0.15 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 5/20.0 ) - #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.15 and ((y_max-y_min)/(x_max-x_min))<0.3 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 2/20.0 ) - #########elif (y_max-y_min) <= (x_max-x_min) and ((y_max-y_min)/(x_max-x_min))>=0.3 and (x_max-x_min)>50: - #########dilation_m1 = int( (y_max-y_min) * 1/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))<0.15 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 5/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.15 and ((x_max-x_min)/(y_max-y_min))<0.3 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 2/20.0 ) - #########elif (x_max-x_min) < (y_max-y_min) and ((x_max-x_min)/(y_max-y_min))>=0.3 and (y_max-y_min)>50: - #########dilation_m1 = int( (x_max-x_min) * 1/20.0 ) - #########else: - #########dilation_m1 = int( (y_max-y_min) * 4/20.0 ) if (y_max-y_min) <= (x_max-x_min): dilation_m1 = round(area / (x_max-x_min) * 0.35) @@ -4126,11 +4309,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - ###inc_x =list(inc_x) - ###inc_x.append(inc_x[0]) - - ###inc_y =list(inc_y) - ###inc_y.append(inc_y[0]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4146,11 +4324,6 @@ class Eynollah: all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons - - - - - def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): @@ -4403,12 +4576,12 @@ class Eynollah: t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) - polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) - polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -4537,9 +4710,10 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: - txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) - txt_con_org = self.dilate_textregions_contours(txt_con_org) contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) + #txt_con_org = self.dilate_textregions_contours(txt_con_org) + #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) #print("text region early 4 in %.1fs", time.time() - t0) From ad323162173f651e9c5f2cb28804c23a582432d5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 27 Sep 2024 20:59:01 +0200 Subject: [PATCH 37/50] updating light version --- qurator/eynollah/eynollah.py | 46 +++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2fe7325..72a72d9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -541,6 +541,7 @@ class Eynollah: img = self.imread() _, page_coord = self.early_page_for_num_of_column_classification(img) + if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) if self.input_binary: @@ -611,6 +612,10 @@ class Eynollah: width_early = img.shape[1] t1 = time.time() _, page_coord = self.early_page_for_num_of_column_classification(img_bin) + + self.image_page_org_size = img[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3], :] + self.page_coord = page_coord + if not self.dir_in: model_num_classifier, session_col_classifier = self.start_new_session_and_model(self.model_dir_of_col_classifier) @@ -737,7 +742,7 @@ class Eynollah: def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) - self.image = self.image.astype(np.uint8) + #self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] @@ -1059,19 +1064,18 @@ class Eynollah: if not patches: img_h_page = img.shape[0] img_w_page = img.shape[1] - img = img / float(255.0) + img = img / 255.0 img = resize_image(img, img_height_model, img_width_model) label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2]), verbose=0) - - #seg_not_base = label_p_pred[0,:,:,4] - - #seg_not_base[seg_not_base>0.4] =1 - #seg_not_base[seg_not_base<1] =0 - seg = np.argmax(label_p_pred, axis=3)[0] - #seg[seg_not_base==1]=4 + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[0,:,:,4] + seg_art[seg_art<0.1] =0 + seg_art[seg_art>0] =1 + seg[seg_art==1]=4 + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) @@ -2151,7 +2155,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 1000 + img_w_new = 800 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -2206,29 +2210,39 @@ class Eynollah: textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h ) + + #print(self.image_org.shape) + + #plt.imshwo(self.image_page_org_size) + #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) - #print(img_resized.shape, num_col_classifier, "num_col_classifier") if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region, n_batch_inference=1) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = False) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light, n_batch_inference=3) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2, n_batch_inference=1) + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=False) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) + #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() + prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) @@ -3195,7 +3209,7 @@ class Eynollah: scale = 1 if is_image_enhanced: if self.allow_enhancement: - img_res = img_res.astype(np.uint8) + #img_res = img_res.astype(np.uint8) self.get_image_and_scales(img_org, img_res, scale) if self.plotter: self.plotter.save_enhanced_image(img_res) From 1774076f4a9536ae68d9ab0a982bb84f65c8d858 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 30 Sep 2024 16:10:29 +0200 Subject: [PATCH 38/50] updating light version. Remove textlines or textregion contours inside a bigger one --- qurator/eynollah/eynollah.py | 124 ++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 10 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 72a72d9..cbc7b88 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylay12sp_0_2"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1071,8 +1071,13 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3)[0] if thresholding_for_artificial_class_in_light_version: + #seg_text = label_p_pred[0,:,:,1] + #seg_text[seg_text<0.2] =0 + #seg_text[seg_text>0] =1 + #seg[seg_text==1]=1 + seg_art = label_p_pred[0,:,:,4] - seg_art[seg_art<0.1] =0 + seg_art[seg_art<0.2] =0 seg_art[seg_art>0] =1 seg[seg_art==1]=4 @@ -2159,7 +2164,7 @@ class Eynollah: img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: - img_w_new = 1300#1500 + img_w_new = 1500#1500 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 3: @@ -2222,7 +2227,7 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = False) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) @@ -2232,7 +2237,7 @@ class Eynollah: else: if num_col_classifier == 1 or num_col_classifier == 2: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=False) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) @@ -2249,16 +2254,19 @@ class Eynollah: img_bin = resize_image(img_bin,img_height_h, img_width_h ) prediction_regions_org=prediction_regions_org[:,:,0] + mask_lines_only = (prediction_regions_org[:,:] ==3)*1 + + mask_texts_only = (prediction_regions_org[:,:] ==1)*1 mask_texts_only = mask_texts_only.astype('uint8') - ##if num_col_classifier == 1 or num_col_classifier == 2: - ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + #if num_col_classifier == 1 or num_col_classifier == 2: + #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) @@ -4110,6 +4118,7 @@ class Eynollah: if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') + dilation_m1 = 5 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): @@ -4267,7 +4276,6 @@ class Eynollah: for ij in range(len(all_found_textline_polygons[j])): con_ind = all_found_textline_polygons[j][ij] - print(len(con_ind[:,0,0]),'con_ind[:,0,0]') area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) @@ -4303,7 +4311,7 @@ class Eynollah: if dilation_m1<4: dilation_m1 = 4 #print(dilation_m1, 'dilation_m1') - dilation_m2 = int(dilation_m1/2.) +1 + dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): if abs_diff[i]==0: @@ -4339,6 +4347,100 @@ class Eynollah: all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons + def filter_contours_inside_a_bigger_one(self,contours, image, marginal_cnts=None, type_contour="textregion"): + if type_contour=="textregion": + areas = [cv2.contourArea(contours[j]) for j in range(len(contours))] + area_tot = image.shape[0]*image.shape[1] + + M_main = [cv2.moments(contours[j]) for j in range(len(contours))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot + contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] + contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] + + #contours_> = [contours[ind] for ind in contours_index_big] + indexes_to_be_removed = [] + for ind_small in contours_index_small: + results = [cv2.pointPolygonTest(contours[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big ] + if marginal_cnts: + results_marginal = [cv2.pointPolygonTest(marginal_cnts[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in range(len(marginal_cnts)) ] + results_marginal = np.array(results_marginal) + + if np.any(results_marginal==1): + indexes_to_be_removed.append(ind_small) + + results = np.array(results) + + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + + + if len(indexes_to_be_removed)>0: + indexes_to_be_removed = np.unique(indexes_to_be_removed) + for ind in indexes_to_be_removed: + contours.pop(ind) + return contours + + + else: + contours_txtline_of_all_textregions = [] + + for jj in range(len(contours)): + contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] + + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] + cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] + area_tot_tot = image.shape[0]*image.shape[1] + + areas_ratio_tot = np.array(areas_tot)/ area_tot_tot + + contours_index_big_tot = [ind for ind in range(len(contours_txtline_of_all_textregions)) if areas_ratio_tot[ind] >= 1e-2] + + + for jj in range(len(contours)): + contours_in = contours[jj] + #print(len(contours_in)) + areas = [cv2.contourArea(con_ind) for con_ind in contours_in] + area_tot = image.shape[0]*image.shape[1] + + M_main = [cv2.moments(contours_in[j]) for j in range(len(contours_in))] + cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot + + if len(areas_ratio)>=1: + #print(np.max(areas_ratio), np.min(areas_ratio)) + contours_index_small = [ind for ind in range(len(contours_in)) if areas_ratio[ind] < 1e-2] + #contours_index_big = [ind for ind in range(len(contours_in)) if areas_ratio[ind] >= 1e-3] + + if len(contours_index_small)>0: + indexes_to_be_removed = [] + for ind_small in contours_index_small: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big_tot ] + + results = np.array(results) + + if np.any(results==1): + indexes_to_be_removed.append(ind_small) + + + if len(indexes_to_be_removed)>0: + indexes_to_be_removed = np.unique(indexes_to_be_removed) + + for ind in indexes_to_be_removed: + contours[jj].pop(ind) + + return contours + + + + def dilate_textlines(self,all_found_textline_polygons): for j in range(len(all_found_textline_polygons)): for i in range(len(all_found_textline_polygons[j])): @@ -4725,6 +4827,7 @@ class Eynollah: #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) + contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) @@ -4742,6 +4845,7 @@ class Eynollah: #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) else: From ab63d5ba408a3dfe42ee897b5e6976d4fc501bdd Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 30 Sep 2024 21:28:39 +0200 Subject: [PATCH 39/50] updating light version features --- qurator/eynollah/eynollah.py | 105 +++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 42 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index cbc7b88..61289fa 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2225,10 +2225,13 @@ class Eynollah: if not self.dir_in: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) @@ -2236,9 +2239,12 @@ class Eynollah: ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: - prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) - prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) - prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page + if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + else: + prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) + prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) @@ -4356,6 +4362,8 @@ class Eynollah: cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + + areas_ratio = np.array(areas)/ area_tot contours_index_small = [ind for ind in range(len(contours)) if areas_ratio[ind] < 1e-3] contours_index_big = [ind for ind in range(len(contours)) if areas_ratio[ind] >= 1e-3] @@ -4379,64 +4387,75 @@ class Eynollah: if len(indexes_to_be_removed)>0: indexes_to_be_removed = np.unique(indexes_to_be_removed) + indexes_to_be_removed = np.sort(indexes_to_be_removed)[::-1] for ind in indexes_to_be_removed: contours.pop(ind) + return contours else: contours_txtline_of_all_textregions = [] + indexes_of_textline_tot = [] + index_textline_inside_textregion = [] for jj in range(len(contours)): contours_txtline_of_all_textregions = contours_txtline_of_all_textregions + contours[jj] + ind_ins = np.zeros( len(contours[jj]) ) + jj + list_ind_ins = list(ind_ins) + + ind_textline_inside_tr = np.array (range(len(contours[jj])) ) + + list_ind_textline_inside_tr = list(ind_textline_inside_tr) + + index_textline_inside_textregion = index_textline_inside_textregion + list_ind_textline_inside_tr + + indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins + + M_main_tot = [cv2.moments(contours_txtline_of_all_textregions[j]) for j in range(len(contours_txtline_of_all_textregions))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + areas_tot = [cv2.contourArea(con_ind) for con_ind in contours_txtline_of_all_textregions] area_tot_tot = image.shape[0]*image.shape[1] - areas_ratio_tot = np.array(areas_tot)/ area_tot_tot - - contours_index_big_tot = [ind for ind in range(len(contours_txtline_of_all_textregions)) if areas_ratio_tot[ind] >= 1e-2] - - - for jj in range(len(contours)): - contours_in = contours[jj] - #print(len(contours_in)) - areas = [cv2.contourArea(con_ind) for con_ind in contours_in] - area_tot = image.shape[0]*image.shape[1] + textregion_index_to_del = [] + textline_in_textregion_index_to_del = [] + for ij in range(len(contours_txtline_of_all_textregions)): - M_main = [cv2.moments(contours_in[j]) for j in range(len(contours_in))] - cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] - cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))] + args_all = list(np.array(range(len(contours_txtline_of_all_textregions)))) - areas_ratio = np.array(areas)/ area_tot + args_all.pop(ij) - if len(areas_ratio)>=1: - #print(np.max(areas_ratio), np.min(areas_ratio)) - contours_index_small = [ind for ind in range(len(contours_in)) if areas_ratio[ind] < 1e-2] - #contours_index_big = [ind for ind in range(len(contours_in)) if areas_ratio[ind] >= 1e-3] - - if len(contours_index_small)>0: - indexes_to_be_removed = [] - for ind_small in contours_index_small: - results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main[ind_small], cy_main[ind_small]), False) for ind in contours_index_big_tot ] - - results = np.array(results) + areas_without = np.array(areas_tot)[args_all] + area_of_con_interest = areas_tot[ij] + + args_with_bigger_area = np.array(args_all)[areas_without > area_of_con_interest] + + if len(args_with_bigger_area)>0: + results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ] + results = np.array(results) + if np.any(results==1): + #print(indexes_of_textline_tot[ij], index_textline_inside_textregion[ij]) + textregion_index_to_del.append(int(indexes_of_textline_tot[ij])) + textline_in_textregion_index_to_del.append(int(index_textline_inside_textregion[ij])) + #contours[int(indexes_of_textline_tot[ij])].pop(int(index_textline_inside_textregion[ij])) - if np.any(results==1): - indexes_to_be_removed.append(ind_small) - - - if len(indexes_to_be_removed)>0: - indexes_to_be_removed = np.unique(indexes_to_be_removed) - - for ind in indexes_to_be_removed: - contours[jj].pop(ind) - - return contours + uniqe_args_trs = np.unique(textregion_index_to_del) + + for ind_u_a_trs in uniqe_args_trs: + textline_in_textregion_index_to_del_ind = np.array(textline_in_textregion_index_to_del)[np.array(textregion_index_to_del)==ind_u_a_trs] + textline_in_textregion_index_to_del_ind = np.sort(textline_in_textregion_index_to_del_ind)[::-1] + + for ittrd in textline_in_textregion_index_to_del_ind: + contours[ind_u_a_trs].pop(ittrd) + + return contours + + @@ -4852,6 +4871,8 @@ class Eynollah: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea, image_page_rotated, boxes_marginals, slope_deskew) + + #all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new(txt_con_org, contours_only_text_parent, textline_mask_tot_ea, image_page_rotated, boxes_text, slope_deskew) From 543ed4bc38b94acf53f48a9224b97322cade0e5b Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 2 Oct 2024 14:09:13 +0200 Subject: [PATCH 40/50] -light version need -tll to be enabled otherwise the process will be ended. --- qurator/eynollah/cli.py | 3 ++ qurator/eynollah/eynollah.py | 63 +++++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index b293403..4c762a8 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -227,6 +227,9 @@ def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, s if textline_light and not light_version: print('Error: You used -tll to enable light textline detection but -light is not enabled') sys.exit(1) + if light_version and not textline_light: + print('Error: You used -light without -tll. Light version need light textline to be enabled.') + sys.exit(1) eynollah = Eynollah( image_filename=image, dir_out=out, diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 61289fa..6b8193c 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlylay12sp_0_2"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1055,6 +1055,35 @@ class Eynollah: #del model #gc.collect() return prediction_true + def do_padding_with_scale(self,img, scale): + h_n = int(img.shape[0]*scale) + w_n = int(img.shape[1]*scale) + + channel0_avg = int( np.mean(img[:,:,0]) ) + channel1_avg = int( np.mean(img[:,:,1]) ) + channel2_avg = int( np.mean(img[:,:,2]) ) + + h_diff = img.shape[0] - h_n + w_diff = img.shape[1] - w_n + + h_start = int(h_diff / 2.) + w_start = int(w_diff / 2.) + + img_res = resize_image(img, h_n, w_n) + #label_res = resize_image(label, h_n, w_n) + + img_scaled_padded = np.copy(img) + + #label_scaled_padded = np.zeros(label.shape) + + img_scaled_padded[:,:,0] = channel0_avg + img_scaled_padded[:,:,1] = channel1_avg + img_scaled_padded[:,:,2] = channel2_avg + + img_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = img_res[:,:,:] + #label_scaled_padded[h_start:h_start+h_n, w_start:w_start+w_n,:] = label_res[:,:,:] + + return img_scaled_padded#, label_scaled_padded def do_prediction_new_concept(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False, thresholding_for_artificial_class_in_light_version=False): self.logger.debug("enter do_prediction") @@ -4349,6 +4378,38 @@ class Eynollah: con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + indices_2 = indices_2[:-1] + + + for ii in range(len(indices_2)): + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons From 1da4b7f589af94beea75157b80c0a7ecb6a213de Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 7 Oct 2024 10:55:10 +0200 Subject: [PATCH 41/50] updating light version --- qurator/eynollah/eynollah.py | 41 ++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 6b8193c..2c14ab9 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_earlyla_12_0_2_con_18_22"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -2189,7 +2189,7 @@ class Eynollah: #print(num_col_classifier,'num_col_classifier') if num_col_classifier == 1: - img_w_new = 800 + img_w_new = 1000 img_h_new = int(img_org.shape[0] / float(img_org.shape[1]) * img_w_new) elif num_col_classifier == 2: @@ -2299,9 +2299,9 @@ class Eynollah: mask_texts_only = mask_texts_only.astype('uint8') - #if num_col_classifier == 1 or num_col_classifier == 2: - #mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) - #mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) + ##if num_col_classifier == 1 or num_col_classifier == 2: + ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) + ##mask_texts_only = cv2.dilate(mask_texts_only, KERNEL, iterations=1) mask_texts_only = cv2.dilate(mask_texts_only, kernel=np.ones((2,2), np.uint8), iterations=1) @@ -4153,7 +4153,7 @@ class Eynollah: if dilation_m1<6: dilation_m1 = 6 #print(dilation_m1, 'dilation_m1') - dilation_m1 = 5 + dilation_m1 = 6 dilation_m2 = int(dilation_m1/2.) +1 for i in range(len(x_differential)): @@ -4657,6 +4657,31 @@ class Eynollah: all_found_textline_polygons[j][i][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons + + def delete_regions_without_textlines(self,slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con): + slopes_rem = [] + all_found_textline_polygons_rem = [] + boxes_text_rem = [] + txt_con_org_rem = [] + contours_only_text_parent_rem = [] + index_by_text_par_con_rem = [] + + for i, ind_con in enumerate(all_found_textline_polygons): + if len(ind_con): + all_found_textline_polygons_rem.append(ind_con) + slopes_rem.append(slopes[i]) + boxes_text_rem.append(boxes_text[i]) + txt_con_org_rem.append(txt_con_org[i]) + contours_only_text_parent_rem.append(contours_only_text_parent[i]) + index_by_text_par_con_rem.append(index_by_text_par_con[i]) + + index_sort = np.argsort(index_by_text_par_con_rem) + indexes_new = np.array(range(len(index_by_text_par_con_rem))) + + index_by_text_par_con_rem_sort = [indexes_new[index_sort==j][0] for j in range(len(index_by_text_par_con_rem))] + + return slopes_rem, all_found_textline_polygons_rem, boxes_text_rem, txt_con_org_rem, contours_only_text_parent_rem, index_by_text_par_con_rem_sort + def run(self): """ Get image and scales, then extract the page of scanned image @@ -4923,6 +4948,9 @@ class Eynollah: slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) + + #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") @@ -5121,6 +5149,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") order_text_new = [0] From 3ef4eac24ca5d876243c62860ad9d4fa05110081 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Thu, 17 Oct 2024 19:12:28 +0200 Subject: [PATCH 42/50] textlines of textregions are extracted in a faster way + early layout for all documents is done with no patches model and on rgb input --- qurator/eynollah/eynollah.py | 120 +++++++++++++++++++--------- qurator/eynollah/utils/marginals.py | 65 ++------------- 2 files changed, 89 insertions(+), 96 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 2c14ab9..fd66b81 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -252,7 +252,7 @@ class Eynollah: self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" self.model_region_dir_p_ens_light = dir_models + "/eynollah-main-regions_20220314" self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" - self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" + self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: @@ -1710,6 +1710,36 @@ class Eynollah: self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions2 + def get_slopes_and_deskew_new_light2(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): + + polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + + M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] + cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] + + args_textlines = np.array(range(len(polygons_of_textlines))) + all_found_textline_polygons = [] + slopes = [] + all_box_coord =[] + + for index, con_region_ind in enumerate(contours_par): + results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False) for ind in args_textlines ] + results = np.array(results) + + indexes_in = args_textlines[results==1] + + textlines_ins = [polygons_of_textlines[ind] for ind in indexes_in] + + all_found_textline_polygons.append(textlines_ins) + slopes.append(0) + + _, crop_coor = crop_image_inside_box(boxes[index],image_page_rotated) + + all_box_coord.append(crop_coor) + + return slopes, all_found_textline_polygons, boxes, contours, contours_par, all_box_coord, np.array(range(len(contours_par))) + def get_slopes_and_deskew_new_light(self, contours, contours_par, textline_mask_tot, image_page_rotated, boxes, slope_deskew): self.logger.debug("enter get_slopes_and_deskew_new") if len(contours)>15: @@ -2099,14 +2129,14 @@ class Eynollah: img = resize_image(img_org, int(img_org.shape[0] * scaler_h), int(img_org.shape[1] * scaler_w)) if not self.dir_in: - prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3, thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) #prediction_textline[:,:][prediction_textline_nopatch[:,:]==0] = 0 else: - prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.2, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) + prediction_textline = self.do_prediction(patches, img, self.model_textline, marginal_of_patch_percent=0.15, n_batch_inference=3,thresholding_for_artificial_class_in_light_version=thresholding_for_artificial_class_in_light_version) #if not thresholding_for_artificial_class_in_light_version: #if num_col_classifier==1: #prediction_textline_nopatch = self.do_prediction(False, img, model_textline) @@ -2216,14 +2246,14 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 3): + if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): if not self.dir_in: model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - #print("inside bin ", time.time()-t_bin) + print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2236,7 +2266,7 @@ class Eynollah: else: img_bin = np.copy(img_resized) - #print("inside 1 ", time.time()-t_in) + print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2246,14 +2276,15 @@ class Eynollah: #print(self.image_org.shape) + #cv2.imwrite('out_13.png', self.image_page_org_size) #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: - #print("inside 2 ", time.time()-t_in) + print("inside 2 ", time.time()-t_in) if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier == 2: + if num_col_classifier == 1 or num_col_classifier >= 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) @@ -2267,7 +2298,7 @@ class Eynollah: ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - if num_col_classifier == 1 or num_col_classifier == 2: + if num_col_classifier == 1 or num_col_classifier >= 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) else: @@ -2278,7 +2309,7 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - #print("inside 3 ", time.time()-t_in) + print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2356,7 +2387,15 @@ class Eynollah: text_regions_p_true[:,:][mask_images_only[:,:] == 1] = 2 text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) - #print("inside 4 ", time.time()-t_in) + + #plt.imshow(textline_mask_tot_ea) + #plt.show() + + textline_mask_tot_ea[(text_regions_p_true==0) | (text_regions_p_true==4) ] = 0 + + #plt.imshow(textline_mask_tot_ea) + #plt.show() + print("inside 4 ", time.time()-t_in) return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) @@ -3308,7 +3347,7 @@ class Eynollah: if self.tables: regions_without_separators[table_prediction==1] = 1 regions_without_separators = regions_without_separators.astype(np.uint8) - text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=KERNEL) + text_regions_p = get_marginals(rotate_image(regions_without_separators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, light_version=self.light_version, kernel=KERNEL) except Exception as e: self.logger.error("exception %s", e) @@ -3319,6 +3358,7 @@ class Eynollah: def run_boxes_no_full_layout(self, image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts): self.logger.debug('enter run_boxes_no_full_layout') + t_0_box = time.time() if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, textline_mask_tot_d, text_regions_p_1_n, table_prediction_n = rotation_not_90_func(image_page, textline_mask_tot, text_regions_p, table_prediction, slope_deskew) text_regions_p_1_n = resize_image(text_regions_p_1_n, text_regions_p.shape[0], text_regions_p.shape[1]) @@ -3328,6 +3368,7 @@ class Eynollah: if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) + print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3340,7 +3381,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - + print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: @@ -3350,6 +3391,7 @@ class Eynollah: else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) + print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) @@ -3378,7 +3420,7 @@ class Eynollah: img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - + print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: @@ -3410,7 +3452,7 @@ class Eynollah: pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - + print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -3751,8 +3793,10 @@ class Eynollah: img_poly[text_regions_p[:,:]==3] = 4 img_poly[text_regions_p[:,:]==6] = 5 - - model_ro_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) + if self.dir_in: + pass + else: + self.model_reading_order_machine, _ = self.start_new_session_and_model(self.model_reading_order_machine_dir) height1 =672#448 width1 = 448#224 @@ -3793,7 +3837,7 @@ class Eynollah: img3 = img3.astype(np.uint16) - inference_bs = 4 + inference_bs = 3 input_1= np.zeros( (inference_bs, height1, width1,3)) starting_list_of_regions = [] starting_list_of_regions.append( list(range(labels_con.shape[2])) ) @@ -3835,7 +3879,7 @@ class Eynollah: batch_counter = batch_counter+1 if batch_counter==inference_bs or ( (tot_counter//inference_bs)==full_bs_ite and tot_counter%inference_bs==last_bs): - y_pr=model_ro_machine.predict(input_1 , verbose=0) + y_pr=self.model_reading_order_machine.predict(input_1 , verbose=0) if batch_counter==inference_bs: iteration_batches = inference_bs @@ -4698,16 +4742,16 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - #print("text region early -11 in %.1fs", time.time() - t0) + print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - #print("text region early -1 in %.1fs", time.time() - t0) + print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - #print("text region early -2 in %.1fs", time.time() - t0) + print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: @@ -4720,17 +4764,17 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) - #print("text region early -2,5 in %.1fs", time.time() - t0) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) + print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) - #print("text region early -3 in %.1fs", time.time() - t0) + print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - #print("text region early -4 in %.1fs", time.time() - t0) + print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4751,7 +4795,7 @@ class Eynollah: continue else: return pcgts - #print("text region early in %.1fs", time.time() - t0) + print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -4793,7 +4837,8 @@ class Eynollah: image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) - #print("text region early 2 marginal in %.1fs", time.time() - t0) + print("text region early 2 marginal in %.1fs", time.time() - t0) + ## birdan sora chock chakir t1 = time.time() if not self.full_layout: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) @@ -4807,7 +4852,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - #print("text region early 2 in %.1fs", time.time() - t0) + print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -4929,7 +4974,7 @@ class Eynollah: else: pass - #print("text region early 3 in %.1fs", time.time() - t0) + print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) @@ -4938,14 +4983,17 @@ class Eynollah: #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - #print("text region early 4 in %.1fs", time.time() - t0) + print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - #print("text region early 5 in %.1fs", time.time() - t0) + print("text region early 5 in %.1fs", time.time() - t0) + ## birdan sora chock chakir if not self.curved_line: if self.light_version: if self.textline_light: - slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) + + slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con = self.get_slopes_and_deskew_new_light2(txt_con_org, contours_only_text_parent, textline_mask_tot_ea_org, image_page_rotated, boxes_text, slope_deskew) slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _ = self.get_slopes_and_deskew_new_light(polygons_of_marginals, polygons_of_marginals, textline_mask_tot_ea_org, image_page_rotated, boxes_marginals, slope_deskew) #slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con = self.delete_regions_without_textlines(slopes, all_found_textline_polygons, boxes_text, txt_con_org, contours_only_text_parent, index_by_text_par_con) @@ -4974,7 +5022,7 @@ class Eynollah: all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - #print("text region early 6 in %.1fs", time.time() - t0) + print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) @@ -5134,7 +5182,7 @@ class Eynollah: self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - #print("text region early 7 in %.1fs", time.time() - t0) + print("text region early 7 in %.1fs", time.time() - t0) else: _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) diff --git a/qurator/eynollah/utils/marginals.py b/qurator/eynollah/utils/marginals.py index 7c43de6..984156f 100644 --- a/qurator/eynollah/utils/marginals.py +++ b/qurator/eynollah/utils/marginals.py @@ -8,7 +8,7 @@ from .contour import find_new_features_of_contours, return_contours_of_intereste from .resize import resize_image from .rotate import rotate_image -def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None): +def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, light_version=False, kernel=None): mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1])) mask_marginals=mask_marginals.astype(np.uint8) @@ -49,27 +49,14 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N if thickness_along_y_percent>=14: text_with_lines_y_rev=-1*text_with_lines_y[:] - #print(text_with_lines_y) - #print(text_with_lines_y_rev) - - - - - #plt.plot(text_with_lines_y) - #plt.show() - text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev) - #plt.plot(text_with_lines_y_rev) - #plt.show() sigma_gaus=1 region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus) region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus) - #plt.plot(region_sum_0_rev) - #plt.show() region_sum_0_updown=region_sum_0[len(region_sum_0)::-1] first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None)) @@ -78,43 +65,17 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N last_nonzero=len(region_sum_0)-last_nonzero - ##img_sum_0_smooth_rev=-region_sum_0 - - mid_point=(last_nonzero+first_nonzero)/2. one_third_right=(last_nonzero-mid_point)/3.0 one_third_left=(mid_point-first_nonzero)/3.0 - #img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev) - - - - peaks, _ = find_peaks(text_with_lines_y_rev, height=0) - - peaks=np.array(peaks) - - - #print(region_sum_0[peaks]) - ##plt.plot(region_sum_0) - ##plt.plot(peaks,region_sum_0[peaks],'*') - ##plt.show() - #print(first_nonzero,last_nonzero,peaks) peaks=peaks[(peaks>first_nonzero) & ((peaksmid_point] @@ -137,9 +98,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N - - #print(point_left,point_right) - #print(text_regions.shape) if point_right>=mask_marginals.shape[1]: point_right=mask_marginals.shape[1]-1 @@ -148,10 +106,8 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N except: mask_marginals[:,:]=1 - #print(mask_marginals.shape,point_left,point_right,'nadosh') mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew) - #print(mask_marginals_rotated.shape,'nadosh') mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0) mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1 @@ -168,11 +124,6 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N max_point_of_right_marginal=text_regions.shape[1]-1 - #print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew') - #print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated') - #plt.imshow(mask_marginals) - #plt.show() - #plt.imshow(mask_marginals_rotated) #plt.show() @@ -195,10 +146,9 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N x_min_marginals_right=[] for i in range(len(cx_text_only)): - x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i]) y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i]) - #print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar') + if x_width_mar>16 and y_height_mar/x_width_mar<18: marginlas_should_be_main_text.append(polygons_of_marginals[i]) if x_min_text_only[i]<(mid_point-one_third_left): @@ -220,18 +170,13 @@ def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=N x_min_marginals_right=[text_regions.shape[1]-1] - - - #print(x_min_marginals_left[0],x_min_marginals_right[0],'margo') - - #print(marginlas_should_be_main_text,'marginlas_should_be_main_text') text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4)) - #print(np.unique(text_regions)) #text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0 #text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0 - + + text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0 text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0 From f93fa12441104324ee8e7ced0488b44827704de3 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 18 Oct 2024 09:14:42 +0200 Subject: [PATCH 43/50] doing more multiprocessing in order to make the process faster --- qurator/eynollah/eynollah.py | 92 +++--- qurator/eynollah/utils/__init__.py | 93 +----- qurator/eynollah/utils/contour.py | 73 ++++- qurator/eynollah/utils/separate_lines.py | 386 +++++++++++++++++------ 4 files changed, 407 insertions(+), 237 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index fd66b81..79724cc 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2253,7 +2253,7 @@ class Eynollah: else: prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - print("inside bin ", time.time()-t_bin) + #print("inside bin ", time.time()-t_bin) prediction_bin=prediction_bin[:,:,0] prediction_bin = (prediction_bin[:,:]==0)*1 prediction_bin = prediction_bin*255 @@ -2266,7 +2266,7 @@ class Eynollah: else: img_bin = np.copy(img_resized) - print("inside 1 ", time.time()-t_in) + #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_resized, num_col_classifier) @@ -2281,7 +2281,7 @@ class Eynollah: #plt.imshwo(self.image_page_org_size) #plt.show() if not skip_layout_and_reading_order: - print("inside 2 ", time.time()-t_in) + #print("inside 2 ", time.time()-t_in) if not self.dir_in: if num_col_classifier == 1 or num_col_classifier >= 2: @@ -2309,7 +2309,7 @@ class Eynollah: prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) - print("inside 3 ", time.time()-t_in) + #print("inside 3 ", time.time()-t_in) #plt.imshow(prediction_regions_org[:,:,0]) #plt.show() @@ -2395,7 +2395,7 @@ class Eynollah: #plt.imshow(textline_mask_tot_ea) #plt.show() - print("inside 4 ", time.time()-t_in) + #print("inside 4 ", time.time()-t_in) return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin else: img_bin = resize_image(img_bin,img_height_h, img_width_h ) @@ -3368,7 +3368,7 @@ class Eynollah: if self.tables: regions_without_separators_d[table_prediction_n[:,:] == 1] = 1 regions_without_separators = (text_regions_p[:, :] == 1) * 1 # ( (text_regions_p[:,:]==1) | (text_regions_p[:,:]==2) )*1 #self.return_regions_without_separators_new(text_regions_p[:,:,0],img_only_regions) - print(time.time()-t_0_box,'time box in 1') + #print(time.time()-t_0_box,'time box in 1') if self.tables: regions_without_separators[table_prediction ==1 ] = 1 if np.abs(slope_deskew) < SLOPE_THRESHOLD: @@ -3381,7 +3381,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: _, _, matrix_of_lines_ch_d, splitter_y_new_d, _ = find_number_of_columns_in_document(np.repeat(text_regions_p_1_n[:, :, np.newaxis], 3, axis=2), num_col_classifier, self.tables, pixel_lines) - print(time.time()-t_0_box,'time box in 2') + #print(time.time()-t_0_box,'time box in 2') self.logger.info("num_col_classifier: %s", num_col_classifier) if num_col_classifier >= 3: @@ -3391,36 +3391,41 @@ class Eynollah: else: regions_without_separators_d = regions_without_separators_d.astype(np.uint8) regions_without_separators_d = cv2.erode(regions_without_separators_d[:, :], KERNEL, iterations=6) - print(time.time()-t_0_box,'time box in 3') + #print(time.time()-t_0_box,'time box in 3') t1 = time.time() if np.abs(slope_deskew) < SLOPE_THRESHOLD: boxes, peaks_neg_tot_tables = return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_without_separators, matrix_of_lines_ch, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d = None self.logger.debug("len(boxes): %s", len(boxes)) + #print(time.time()-t_0_box,'time box in 3.1') - text_regions_p_tables = np.copy(text_regions_p) - text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) - img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + if self.tables: + text_regions_p_tables = np.copy(text_regions_p) + text_regions_p_tables[:,:][(table_prediction[:,:] == 1)] = 10 + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables, boxes, 0, splitter_y_new, peaks_neg_tot_tables, text_regions_p_tables , num_col_classifier , 0.000005, pixel_line) + #print(time.time()-t_0_box,'time box in 3.2') + img_revised_tab2, contoures_tables = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction, 10, num_col_classifier) + #print(time.time()-t_0_box,'time box in 3.3') else: boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes = None self.logger.debug("len(boxes): %s", len(boxes_d)) - text_regions_p_tables = np.copy(text_regions_p_1_n) - text_regions_p_tables =np.round(text_regions_p_tables) - text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 - - pixel_line = 3 - img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) - img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) - - img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) - img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) - img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) - img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) - print(time.time()-t_0_box,'time box in 4') + if self.tables: + text_regions_p_tables = np.copy(text_regions_p_1_n) + text_regions_p_tables =np.round(text_regions_p_tables) + text_regions_p_tables[:,:][(text_regions_p_tables[:,:] != 3) & (table_prediction_n[:,:] == 1)] = 10 + + pixel_line = 3 + img_revised_tab2 = self.add_tables_heuristic_to_layout(text_regions_p_tables,boxes_d,0,splitter_y_new_d,peaks_neg_tot_tables_d,text_regions_p_tables, num_col_classifier, 0.000005, pixel_line) + img_revised_tab2_d,_ = self.check_iou_of_bounding_box_and_contour_for_tables(img_revised_tab2,table_prediction_n, 10, num_col_classifier) + + img_revised_tab2_d_rotated = rotate_image(img_revised_tab2_d, -slope_deskew) + img_revised_tab2_d_rotated = np.round(img_revised_tab2_d_rotated) + img_revised_tab2_d_rotated = img_revised_tab2_d_rotated.astype(np.int8) + img_revised_tab2_d_rotated = resize_image(img_revised_tab2_d_rotated, text_regions_p.shape[0], text_regions_p.shape[1]) + #print(time.time()-t_0_box,'time box in 4') self.logger.info("detecting boxes took %.1fs", time.time() - t1) if self.tables: @@ -3452,7 +3457,7 @@ class Eynollah: pixel_img = 10 contours_tables = return_contours_of_interested_region(text_regions_p, pixel_img, min_area_mar) - print(time.time()-t_0_box,'time box in 5') + #print(time.time()-t_0_box,'time box in 5') self.logger.debug('exit run_boxes_no_full_layout') return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables @@ -4742,16 +4747,16 @@ class Eynollah: t0 = time.time() if self.dir_in: self.reset_file_name_dir(os.path.join(self.dir_in,img_name)) - print("text region early -11 in %.1fs", time.time() - t0) + #print("text region early -11 in %.1fs", time.time() - t0) img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version) self.logger.info("Enhancing took %.1fs ", time.time() - t0) - print("text region early -1 in %.1fs", time.time() - t0) + #print("text region early -1 in %.1fs", time.time() - t0) t1 = time.time() if not self.skip_layout_and_reading_order: if self.light_version: text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) - print("text region early -2 in %.1fs", time.time() - t0) + #print("text region early -2 in %.1fs", time.time() - t0) if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1: @@ -4764,17 +4769,17 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) - print("text region early -2,5 in %.1fs", time.time() - t0) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ self.run_graphics_and_columns_light(text_regions_p_1, textline_mask_tot_ea, num_col_classifier, num_column_is_classified, erosion_hurts, img_bin_light) #self.logger.info("run graphics %.1fs ", time.time() - t1t) - print("text region early -3 in %.1fs", time.time() - t0) + #print("text region early -3 in %.1fs", time.time() - t0) textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea) - print("text region early -4 in %.1fs", time.time() - t0) + #print("text region early -4 in %.1fs", time.time() - t0) else: text_regions_p_1 ,erosion_hurts, polygons_lines_xml = self.get_regions_from_xy_2models(img_res, is_image_enhanced, num_col_classifier) self.logger.info("Textregion detection took %.1fs ", time.time() - t1) @@ -4795,7 +4800,7 @@ class Eynollah: continue else: return pcgts - print("text region early in %.1fs", time.time() - t0) + #print("text region early in %.1fs", time.time() - t0) t1 = time.time() if not self.light_version: textline_mask_tot_ea = self.run_textline(image_page) @@ -4837,7 +4842,7 @@ class Eynollah: image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) self.logger.info("detection of marginals took %.1fs", time.time() - t1) - print("text region early 2 marginal in %.1fs", time.time() - t0) + #print("text region early 2 marginal in %.1fs", time.time() - t0) ## birdan sora chock chakir t1 = time.time() if not self.full_layout: @@ -4852,7 +4857,7 @@ class Eynollah: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 - print("text region early 2 in %.1fs", time.time() - t0) + #print("text region early 2 in %.1fs", time.time() - t0) ###min_con_area = 0.000005 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text, hir_on_text = return_contours_of_image(text_only) @@ -4974,19 +4979,20 @@ class Eynollah: else: pass - print("text region early 3 in %.1fs", time.time() - t0) + #print("text region early 3 in %.1fs", time.time() - t0) if self.light_version: contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) contours_only_text_parent = self.filter_contours_inside_a_bigger_one(contours_only_text_parent, text_only, marginal_cnts=polygons_of_marginals) + #print("text region early 3.5 in %.1fs", time.time() - t0) txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first) #txt_con_org = self.dilate_textregions_contours(txt_con_org) #contours_only_text_parent = self.dilate_textregions_contours(contours_only_text_parent) else: txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first) - print("text region early 4 in %.1fs", time.time() - t0) + #print("text region early 4 in %.1fs", time.time() - t0) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) - print("text region early 5 in %.1fs", time.time() - t0) + #print("text region early 5 in %.1fs", time.time() - t0) ## birdan sora chock chakir if not self.curved_line: if self.light_version: @@ -5022,7 +5028,7 @@ class Eynollah: all_found_textline_polygons = small_textlines_to_parent_adherence2(all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_textline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, _, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_textline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_textline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) - print("text region early 6 in %.1fs", time.time() - t0) + #print("text region early 6 in %.1fs", time.time() - t0) if self.full_layout: if np.abs(slope_deskew) >= SLOPE_THRESHOLD: contours_only_text_parent_d_ordered = self.return_list_of_contours_with_desired_order(contours_only_text_parent_d_ordered, index_by_text_par_con) @@ -5182,7 +5188,7 @@ class Eynollah: self.logger.info("Job done in %.1fs", time.time() - t0) if not self.dir_in: return pcgts - print("text region early 7 in %.1fs", time.time() - t0) + #print("text region early 7 in %.1fs", time.time() - t0) else: _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_and_reading_order=self.skip_layout_and_reading_order) diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 8705ecf..6219df2 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -7,7 +7,7 @@ import cv2 import imutils from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d - +import time from .is_nan import isNaN from .contour import (contours_in_same_horizon, find_new_features_of_contours, @@ -1342,7 +1342,7 @@ def return_points_with_boundies(peaks_neg_fin, first_point, last_point): return peaks_neg_tot def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, pixel_lines, contours_h=None): - + t_ins_c0 = time.time() separators_closeup=( (region_pre_p[:,:,:]==pixel_lines))*1 separators_closeup[0:110,:,:]=0 @@ -1356,84 +1356,47 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, separators_closeup_new=np.zeros((separators_closeup.shape[0] ,separators_closeup.shape[1] )) - - - - ##_,separators_closeup_n=self.combine_hor_lines_and_delete_cross_points_and_get_lines_features_back(region_pre_p[:,:,0]) separators_closeup_n=np.copy(separators_closeup) separators_closeup_n=separators_closeup_n.astype(np.uint8) - ##plt.imshow(separators_closeup_n[:,:,0]) - ##plt.show() separators_closeup_n_binary=np.zeros(( separators_closeup_n.shape[0],separators_closeup_n.shape[1]) ) separators_closeup_n_binary[:,:]=separators_closeup_n[:,:,0] separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]!=0]=1 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==0]=255 - #separators_closeup_n_binary[:,:][separators_closeup_n_binary[:,:]==-255]=0 - - - #separators_closeup_n_binary=(separators_closeup_n_binary[:,:]==2)*1 - - #gray = cv2.cvtColor(separators_closeup_n, cv2.COLOR_BGR2GRAY) - - ### - - #print(separators_closeup_n_binary.shape) + gray_early=np.repeat(separators_closeup_n_binary[:, :, np.newaxis], 3, axis=2) gray_early=gray_early.astype(np.uint8) - #print(gray_early.shape,'burda') imgray_e = cv2.cvtColor(gray_early, cv2.COLOR_BGR2GRAY) - #print('burda2') ret_e, thresh_e = cv2.threshold(imgray_e, 0, 255, 0) - #print('burda3') contours_line_e,hierarchy_e=cv2.findContours(thresh_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) - #slope_lines_e,dist_x_e, x_min_main_e ,x_max_main_e ,cy_main_e,slope_lines_org_e,y_min_main_e, y_max_main_e, cx_main_e=self.find_features_of_lines(contours_line_e) - slope_linese,dist_xe, x_min_maine ,x_max_maine ,cy_maine,slope_lines_orge,y_min_maine, y_max_maine, cx_maine=find_features_of_lines(contours_line_e) dist_ye=y_max_maine-y_min_maine - #print(y_max_maine-y_min_maine,'y') - #print(dist_xe,'x') args_e=np.array(range(len(contours_line_e))) args_hor_e=args_e[(dist_ye<=50) & (dist_xe>=3*dist_ye)] - #print(args_hor_e,'jidi',len(args_hor_e),'jilva') cnts_hor_e=[] for ce in args_hor_e: cnts_hor_e.append(contours_line_e[ce]) - #print(len(slope_linese),'lieee') figs_e=np.zeros(thresh_e.shape) figs_e=cv2.fillPoly(figs_e,pts=cnts_hor_e,color=(1,1,1)) - #plt.imshow(figs_e) - #plt.show() - - ### - separators_closeup_n_binary=cv2.fillPoly(separators_closeup_n_binary,pts=cnts_hor_e,color=(0,0,0)) gray = cv2.bitwise_not(separators_closeup_n_binary) gray=gray.astype(np.uint8) - - #plt.imshow(gray) - #plt.show() - - bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \ cv2.THRESH_BINARY, 15, -2) - ##plt.imshow(bw[:,:]) - ##plt.show() - + horizontal = np.copy(bw) vertical = np.copy(bw) @@ -1451,16 +1414,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, horizontal = cv2.dilate(horizontal,kernel,iterations = 2) horizontal = cv2.erode(horizontal,kernel,iterations = 2) - - ### - #print(np.unique(horizontal),'uni') horizontal=cv2.fillPoly(horizontal,pts=cnts_hor_e,color=(255,255,255)) - ### - - - - #plt.imshow(horizontal) - #plt.show() rows = vertical.shape[0] verticalsize = rows // 30 @@ -1471,35 +1425,21 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, vertical = cv2.dilate(vertical, verticalStructure) vertical = cv2.dilate(vertical,kernel,iterations = 1) - # Show extracted vertical lines horizontal,special_separators=combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(vertical,horizontal,num_col_classifier) - - #plt.imshow(horizontal) - #plt.show() - #print(vertical.shape,np.unique(vertical),'verticalvertical') separators_closeup_new[:,:][vertical[:,:]!=0]=1 separators_closeup_new[:,:][horizontal[:,:]!=0]=1 - ##plt.imshow(separators_closeup_new) - ##plt.show() - ##separators_closeup_n vertical=np.repeat(vertical[:, :, np.newaxis], 3, axis=2) vertical=vertical.astype(np.uint8) - ##plt.plot(vertical[:,:,0].sum(axis=0)) - ##plt.show() - - #plt.plot(vertical[:,:,0].sum(axis=1)) - #plt.show() - imgray = cv2.cvtColor(vertical, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(imgray, 0, 255, 0) contours_line_vers,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) slope_lines,dist_x, x_min_main ,x_max_main ,cy_main,slope_lines_org,y_min_main, y_max_main, cx_main=find_features_of_lines(contours_line_vers) - #print(slope_lines,'vertical') + args=np.array( range(len(slope_lines) )) args_ver=args[slope_lines==1] dist_x_ver=dist_x[slope_lines==1] @@ -1512,9 +1452,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, len_y=separators_closeup.shape[0]/3.0 - #plt.imshow(horizontal) - #plt.show() - horizontal=np.repeat(horizontal[:, :, np.newaxis], 3, axis=2) horizontal=horizontal.astype(np.uint8) imgray = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY) @@ -1582,8 +1519,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, matrix_of_lines_ch[len(cy_main_hor):,9]=1 - - if contours_h is not None: slope_lines_head,dist_x_head, x_min_main_head ,x_max_main_head ,cy_main_head,slope_lines_org_head,y_min_main_head, y_max_main_head, cx_main_head=find_features_of_lines(contours_h) matrix_l_n=np.zeros((matrix_of_lines_ch.shape[0]+len(cy_main_head),matrix_of_lines_ch.shape[1])) @@ -1629,8 +1564,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, args_big_parts=np.array(range(len(splitter_y_new_diff))) [ splitter_y_new_diff>22 ] - - regions_without_separators=return_regions_without_separators(region_pre_p) @@ -1640,19 +1573,8 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin_fin=[] for itiles in args_big_parts: - - regions_without_separators_tile=regions_without_separators[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:,0] - #image_page_background_zero_tile=image_page_background_zero[int(splitter_y_new[itiles]):int(splitter_y_new[itiles+1]),:] - - #print(regions_without_separators_tile.shape) - ##plt.imshow(regions_without_separators_tile) - ##plt.show() - - #num_col, peaks_neg_fin=self.find_num_col(regions_without_separators_tile,multiplier=6.0) - - #regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3) - # + try: num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0) except: @@ -1670,9 +1592,6 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables, peaks_neg_fin=peaks_neg_fin[peaks_neg_fin<=(vertical.shape[1]-500)] peaks_neg_fin_fin=peaks_neg_fin[:] - #print(peaks_neg_fin_fin,'peaks_neg_fin_fintaza') - - return num_col_fin, peaks_neg_fin_fin,matrix_of_lines_ch,splitter_y_new,separators_closeup_n diff --git a/qurator/eynollah/utils/contour.py b/qurator/eynollah/utils/contour.py index 53b39b5..8a92ace 100644 --- a/qurator/eynollah/utils/contour.py +++ b/qurator/eynollah/utils/contour.py @@ -263,7 +263,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first): return cnts_org -def get_textregion_contours_in_org_image_light(cnts, img, slope_first): +def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): h_o = img.shape[0] w_o = img.shape[1] @@ -278,14 +278,7 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): img_copy = np.zeros(img.shape) img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1)) - # plt.imshow(img_copy) - # plt.show() - - # print(img.shape,'img') img_copy = rotation_image_new(img_copy, -slope_first) - ##print(img_copy.shape,'img_copy') - # plt.imshow(img_copy) - # plt.show() img_copy = img_copy.astype(np.uint8) imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) @@ -300,6 +293,70 @@ def get_textregion_contours_in_org_image_light(cnts, img, slope_first): return cnts_org +def return_list_of_contours_with_desired_order(ls_cons, sorted_indexes): + return [ls_cons[sorted_indexes[index]] for index in range(len(sorted_indexes))] +def do_back_rotation_and_get_cnt_back(queue_of_all_params, contours_par_per_process,indexes_r_con_per_pro, img, slope_first): + contours_textregion_per_each_subprocess = [] + index_by_text_region_contours = [] + for mv in range(len(contours_par_per_process)): + img_copy = np.zeros(img.shape) + img_copy = cv2.fillPoly(img_copy, pts=[contours_par_per_process[mv]], color=(1, 1, 1)) + + img_copy = rotation_image_new(img_copy, -slope_first) + + img_copy = img_copy.astype(np.uint8) + imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY) + ret, thresh = cv2.threshold(imgray, 0, 255, 0) + + cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + + cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1]) + cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) + # print(np.shape(cont_int[0])) + contours_textregion_per_each_subprocess.append(cont_int[0]*6) + index_by_text_region_contours.append(indexes_r_con_per_pro[mv]) + + queue_of_all_params.put([contours_textregion_per_each_subprocess, index_by_text_region_contours]) + +def get_textregion_contours_in_org_image_light(cnts, img, slope_first): + num_cores = cpu_count() + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(cnts), num_cores + 1) + indexes_by_text_con = np.array(range(len(cnts))) + + h_o = img.shape[0] + w_o = img.shape[1] + + img = cv2.resize(img, (int(img.shape[1]/6.), int(img.shape[0]/6.)), interpolation=cv2.INTER_NEAREST) + ##cnts = list( (np.array(cnts)/2).astype(np.int16) ) + #cnts = cnts/2 + cnts = [(i/ 6).astype(np.int32) for i in cnts] + + for i in range(num_cores): + contours_par_per_process = cnts[int(nh[i]) : int(nh[i + 1])] + indexes_text_con_per_process = indexes_by_text_con[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_back_rotation_and_get_cnt_back, args=(queue_of_all_params, contours_par_per_process, indexes_text_con_per_process, img, slope_first))) + + for i in range(num_cores): + processes[i].start() + + cnts_org = [] + all_index_text_con = [] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + contours_for_subprocess = list_all_par[0] + indexes_for_subprocess = list_all_par[1] + for j in range(len(contours_for_subprocess)): + cnts_org.append(contours_for_subprocess[j]) + all_index_text_con.append(indexes_for_subprocess[j]) + for i in range(num_cores): + processes[i].join() + + cnts_org = return_list_of_contours_with_desired_order(cnts_org, all_index_text_con) + + return cnts_org + def return_contours_of_interested_textline(region_pre_p, pixel): # pixels of images are identified by 5 diff --git a/qurator/eynollah/utils/separate_lines.py b/qurator/eynollah/utils/separate_lines.py index 1004a92..f8df33f 100644 --- a/qurator/eynollah/utils/separate_lines.py +++ b/qurator/eynollah/utils/separate_lines.py @@ -3,7 +3,8 @@ import cv2 from scipy.signal import find_peaks from scipy.ndimage import gaussian_filter1d import os - +from multiprocessing import Process, Queue, cpu_count +from multiprocessing import Pool from .rotate import rotate_image from .contour import ( return_parent_contours, @@ -1569,8 +1570,21 @@ def separate_lines_new2(img_path, thetha, num_col, slope_region, plotter=None): # plt.show() return img_patch_ineterst_revised -def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): +def do_image_rotation(queue_of_all_params,angels_per_process, img_resized, sigma_des): + angels_per_each_subprocess = [] + for mv in range(len(angels_per_process)): + img_rot=rotate_image(img_resized,angels_per_process[mv]) + img_rot[img_rot!=0]=1 + try: + var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + except: + var_spectrum=0 + angels_per_each_subprocess.append(var_spectrum) + + queue_of_all_params.put([angels_per_each_subprocess]) +def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=False, plotter=None): + num_cores = cpu_count() if main_page and plotter: plotter.save_plot_of_textline_density(img_patch_org) @@ -1603,22 +1617,44 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angels=np.array([-45, 0 , 45 , 90 , ])#np.linspace(-12,12,100)#np.array([0 , 45 , 90 , -45]) - + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - ##print(rot,var_spectrum,'var_spectrum') - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ###img_rot[img_rot!=0]=1 + ####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(var_spectrum,'var_spectrum') + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + #####print(rot,var_spectrum,'var_spectrum') + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) + + + try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1628,17 +1664,38 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals angels=np.linspace(ang_int-22.5,ang_int+22.5,n_tot_angles) + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ##var_res=[] + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ##img_rot[img_rot!=0]=1 + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##except: + ##var_spectrum=0 + ##var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1650,24 +1707,46 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals #plt.imshow(img_resized) #plt.show() angels=np.linspace(-12,12,n_tot_angles)#np.array([0 , 45 , 90 , -45]) + + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() - var_res=[] + ##var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ###plt.imshow(img_rot) + ###plt.show() + ##img_rot[img_rot!=0]=1 + ###neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + ###print(var_spectrum,'var_spectrum') + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 + ##except: + ##var_spectrum=0 - var_res.append(var_spectrum) + ##var_res.append(var_spectrum) if plotter: @@ -1681,17 +1760,38 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals early_slope_edge=11 if abs(ang_int)>early_slope_edge and ang_int<0: angels=np.linspace(-90,-12,n_tot_angles) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + ##var_res=[] + ##for rot in angels: + ##img_rot=rotate_image(img_resized,rot) + ####plt.imshow(img_rot) + ####plt.show() + ##img_rot[img_rot!=0]=1 + ##try: + ##var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ##except: + ##var_spectrum=0 + ##var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1701,18 +1801,41 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals elif abs(ang_int)>early_slope_edge and ang_int>0: angels=np.linspace(90,12,n_tot_angles) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + + ###var_res=[] + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(indexer,'indexer') + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1720,20 +1843,42 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals ang_int=0 else: angels=np.linspace(-25,25,int(n_tot_angles/2.)+10) - var_res=[] indexer=0 - for rot in angels: - img_rot=rotate_image(img_resized,rot) - #plt.imshow(img_rot) - #plt.show() - img_rot[img_rot!=0]=1 - #neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(var_spectrum,'var_spectrum') - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + ####var_res=[] + + ####for rot in angels: + ####img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ####img_rot[img_rot!=0]=1 + #####neg_peaks,var_spectrum=self.find_num_col_deskew(img_rot,sigma_des,20.3 ) + #####print(var_spectrum,'var_spectrum') + ####try: + ####var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####except: + ####var_spectrum=0 + ####var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] @@ -1750,19 +1895,40 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals if abs(ang_int)>early_slope_edge and ang_int<0: angels=np.linspace(-90,-25,int(n_tot_angles/2.)+10) - + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] - - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - except: - var_spectrum=0 - var_res.append(var_spectrum) + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() + + ###var_res=[] + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ###except: + ###var_spectrum=0 + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) @@ -1773,22 +1939,44 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, main_page=Fals elif abs(ang_int)>early_slope_edge and ang_int>0: angels=np.linspace(90,25,int(n_tot_angles/2.)+10) - + indexer=0 + + queue_of_all_params = Queue() + processes = [] + nh = np.linspace(0, len(angels), num_cores + 1) + + for i in range(num_cores): + angels_per_process = angels[int(nh[i]) : int(nh[i + 1])] + processes.append(Process(target=do_image_rotation, args=(queue_of_all_params, angels_per_process, img_resized, sigma_des))) + + for i in range(num_cores): + processes[i].start() + var_res=[] + for i in range(num_cores): + list_all_par = queue_of_all_params.get(True) + angles_for_subprocess = list_all_par[0] + for j in range(len(angles_for_subprocess)): + var_res.append(angles_for_subprocess[j]) + + for i in range(num_cores): + processes[i].join() - indexer=0 - for rot in angels: - img_rot=rotate_image(img_resized,rot) - ##plt.imshow(img_rot) - ##plt.show() - img_rot[img_rot!=0]=1 - try: - var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) - #print(indexer,'indexer') - except: - var_spectrum=0 + ###var_res=[] - var_res.append(var_spectrum) + + ###for rot in angels: + ###img_rot=rotate_image(img_resized,rot) + #####plt.imshow(img_rot) + #####plt.show() + ###img_rot[img_rot!=0]=1 + ###try: + ###var_spectrum=find_num_col_deskew(img_rot,sigma_des,20.3 ) + ####print(indexer,'indexer') + ###except: + ###var_spectrum=0 + + ###var_res.append(var_spectrum) try: var_res=np.array(var_res) ang_int=angels[np.argmax(var_res)]#angels_sorted[arg_final]#angels[arg_sort_early[arg_sort[arg_final]]]#angels[arg_fin] From 70772d41042df2415a0918d99f51cb183db36fe5 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 21 Oct 2024 23:46:38 +0200 Subject: [PATCH 44/50] binarization as a standalone command --- qurator/eynollah/cli.py | 33 +++ qurator/eynollah/eynollah.py | 5 +- qurator/eynollah/sbb_binarize.py | 383 +++++++++++++++++++++++++++++++ 3 files changed, 418 insertions(+), 3 deletions(-) create mode 100644 qurator/eynollah/sbb_binarize.py diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 4c762a8..0daf0c9 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -2,6 +2,7 @@ import sys import click from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +from qurator.eynollah.sbb_binarize import SbbBinarizer @click.group() def main(): @@ -48,6 +49,38 @@ def main(): def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): xml_files_ind = os.listdir(dir_xml) +@main.command() +@click.option('--patches/--no-patches', default=True, help='by enabling this parameter you let the model to see the image in patches.') + +@click.option('--model_dir', '-m', type=click.Path(exists=True, file_okay=False), required=True, help='directory containing models for prediction') + +@click.argument('input_image') + +@click.argument('output_image') +@click.option( + "--dir_in", + "-di", + help="directory of images", + type=click.Path(exists=True, file_okay=False), +) +@click.option( + "--dir_out", + "-do", + help="directory where the binarized images will be written", + type=click.Path(exists=True, file_okay=False), +) + +def binarization(patches, model_dir, input_image, output_image, dir_in, dir_out): + if not dir_out and (dir_in): + print("Error: You used -di but did not set -do") + sys.exit(1) + elif dir_out and not (dir_in): + print("Error: You used -do to write out binarized images but have not set -di") + sys.exit(1) + SbbBinarizer(model_dir).run(image_path=input_image, use_patches=patches, save=output_image, dir_in=dir_in, dir_out=dir_out) + + + @main.command() @click.option( diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 79724cc..e587ff3 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -240,7 +240,6 @@ class Eynollah: pcgts=pcgts) self.logger = logger if logger else getLogger('eynollah') self.dir_models = dir_models - self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" @@ -4769,9 +4768,9 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ diff --git a/qurator/eynollah/sbb_binarize.py b/qurator/eynollah/sbb_binarize.py new file mode 100644 index 0000000..36e9ab0 --- /dev/null +++ b/qurator/eynollah/sbb_binarize.py @@ -0,0 +1,383 @@ +""" +Tool to load model and binarize a given image. +""" + +import sys +from glob import glob +from os import environ, devnull +from os.path import join +from warnings import catch_warnings, simplefilter +import os + +import numpy as np +from PIL import Image +import cv2 +environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +stderr = sys.stderr +sys.stderr = open(devnull, 'w') +import tensorflow as tf +from tensorflow.keras.models import load_model +from tensorflow.python.keras import backend as tensorflow_backend +sys.stderr = stderr + + +import logging + +def resize_image(img_in, input_height, input_width): + return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST) + +class SbbBinarizer: + + def __init__(self, model_dir, logger=None): + self.model_dir = model_dir + self.log = logger if logger else logging.getLogger('SbbBinarizer') + + self.start_new_session() + + self.model_files = glob(self.model_dir+"/*/", recursive = True) + + self.models = [] + for model_file in self.model_files: + self.models.append(self.load_model(model_file)) + + def start_new_session(self): + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + + self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() + tensorflow_backend.set_session(self.session) + + def end_session(self): + tensorflow_backend.clear_session() + self.session.close() + del self.session + + def load_model(self, model_name): + model = load_model(join(self.model_dir, model_name), compile=False) + model_height = model.layers[len(model.layers)-1].output_shape[1] + model_width = model.layers[len(model.layers)-1].output_shape[2] + n_classes = model.layers[len(model.layers)-1].output_shape[3] + return model, model_height, model_width, n_classes + + def predict(self, model_in, img, use_patches, n_batch_inference=5): + tensorflow_backend.set_session(self.session) + model, model_height, model_width, n_classes = model_in + + img_org_h = img.shape[0] + img_org_w = img.shape[1] + + if img.shape[0] < model_height and img.shape[1] >= model_width: + img_padded = np.zeros(( model_height, img.shape[1], img.shape[2] )) + + index_start_h = int( abs( img.shape[0] - model_height) /2.) + index_start_w = 0 + + img_padded [ index_start_h: index_start_h+img.shape[0], :, : ] = img[:,:,:] + + elif img.shape[0] >= model_height and img.shape[1] < model_width: + img_padded = np.zeros(( img.shape[0], model_width, img.shape[2] )) + + index_start_h = 0 + index_start_w = int( abs( img.shape[1] - model_width) /2.) + + img_padded [ :, index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] + + + elif img.shape[0] < model_height and img.shape[1] < model_width: + img_padded = np.zeros(( model_height, model_width, img.shape[2] )) + + index_start_h = int( abs( img.shape[0] - model_height) /2.) + index_start_w = int( abs( img.shape[1] - model_width) /2.) + + img_padded [ index_start_h: index_start_h+img.shape[0], index_start_w: index_start_w+img.shape[1], : ] = img[:,:,:] + + else: + index_start_h = 0 + index_start_w = 0 + img_padded = np.copy(img) + + + img = np.copy(img_padded) + + + + if use_patches: + + margin = int(0.1 * model_width) + + width_mid = model_width - 2 * margin + height_mid = model_height - 2 * margin + + + img = img / float(255.0) + + img_h = img.shape[0] + img_w = img.shape[1] + + prediction_true = np.zeros((img_h, img_w, 3)) + mask_true = np.zeros((img_h, img_w)) + nxf = img_w / float(width_mid) + nyf = img_h / float(height_mid) + + if nxf > int(nxf): + nxf = int(nxf) + 1 + else: + nxf = int(nxf) + + if nyf > int(nyf): + nyf = int(nyf) + 1 + else: + nyf = int(nyf) + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + for i in range(nxf): + for j in range(nyf): + + if i == 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + elif i > 0: + index_x_d = i * width_mid + index_x_u = index_x_d + model_width + + if j == 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + elif j > 0: + index_y_d = j * height_mid + index_y_u = index_y_d + model_height + + if index_x_u > img_w: + index_x_u = img_w + index_x_d = img_w - model_width + if index_y_u > img_h: + index_y_u = img_h + index_y_d = img_h - model_height + + + list_i_s.append(i) + list_j_s.append(j) + list_x_u.append(index_x_u) + list_x_d.append(index_x_d) + list_y_d.append(index_y_d) + list_y_u.append(index_y_u) + + + img_patch[batch_indexer,:,:,:] = img[index_y_d:index_y_u, index_x_d:index_x_u, :] + + batch_indexer = batch_indexer + 1 + + + + if batch_indexer == n_batch_inference: + + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + #print(seg.shape, len(seg), len(list_i_s)) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + elif i==(nxf-1) and j==(nyf-1): + label_p_pred = model.predict(img_patch,verbose=0) + + seg = np.argmax(label_p_pred, axis=3) + + #print(seg.shape, len(seg), len(list_i_s)) + + indexer_inside_batch = 0 + for i_batch, j_batch in zip(list_i_s, list_j_s): + seg_in = seg[indexer_inside_batch,:,:] + seg_color = np.repeat(seg_in[:, :, np.newaxis], 3, axis=2) + + index_y_u_in = list_y_u[indexer_inside_batch] + index_y_d_in = list_y_d[indexer_inside_batch] + + index_x_u_in = list_x_u[indexer_inside_batch] + index_x_d_in = list_x_d[indexer_inside_batch] + + if i_batch == 0 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch == 0 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, 0 : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + 0 : index_x_u_in - margin, :] = seg_color + elif i_batch == nxf - 1 and j_batch != 0 and j_batch != nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - 0, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - 0, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == 0: + seg_color = seg_color[0 : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + 0 : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + elif i_batch != 0 and i_batch != nxf - 1 and j_batch == nyf - 1: + seg_color = seg_color[margin : seg_color.shape[0] - 0, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - 0, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + else: + seg_color = seg_color[margin : seg_color.shape[0] - margin, margin : seg_color.shape[1] - margin, :] + prediction_true[index_y_d_in + margin : index_y_u_in - margin, index_x_d_in + margin : index_x_u_in - margin, :] = seg_color + + indexer_inside_batch = indexer_inside_batch +1 + + + list_i_s = [] + list_j_s = [] + list_x_u = [] + list_x_d = [] + list_y_u = [] + list_y_d = [] + + batch_indexer = 0 + + img_patch = np.zeros((n_batch_inference, model_height, model_width,3)) + + + + prediction_true = prediction_true[index_start_h: index_start_h+img_org_h, index_start_w: index_start_w+img_org_w,:] + prediction_true = prediction_true.astype(np.uint8) + + else: + img_h_page = img.shape[0] + img_w_page = img.shape[1] + img = img / float(255.0) + img = resize_image(img, model_height, model_width) + + label_p_pred = model.predict(img.reshape(1, img.shape[0], img.shape[1], img.shape[2])) + + seg = np.argmax(label_p_pred, axis=3)[0] + seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) + prediction_true = resize_image(seg_color, img_h_page, img_w_page) + prediction_true = prediction_true.astype(np.uint8) + return prediction_true[:,:,0] + + def run(self, image=None, image_path=None, save=None, use_patches=False, dir_in=None, dir_out=None): + print(dir_in,'dir_in') + if not dir_in: + if (image is not None and image_path is not None) or \ + (image is None and image_path is None): + raise ValueError("Must pass either a opencv2 image or an image_path") + if image_path is not None: + image = cv2.imread(image_path) + img_last = 0 + for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): + self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + + res = self.predict(model, image, use_patches) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res - 1 + res = res * 255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0) * 255 + if save: + cv2.imwrite(save, img_last) + return img_last + else: + ls_imgs = os.listdir(dir_in) + for image_name in ls_imgs: + image_stem = image_name.split('.')[0] + print(image_name,'image_name') + image = cv2.imread(os.path.join(dir_in,image_name) ) + img_last = 0 + for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): + self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) + + res = self.predict(model, image, use_patches) + + img_fin = np.zeros((res.shape[0], res.shape[1], 3)) + res[:, :][res[:, :] == 0] = 2 + res = res - 1 + res = res * 255 + img_fin[:, :, 0] = res + img_fin[:, :, 1] = res + img_fin[:, :, 2] = res + + img_fin = img_fin.astype(np.uint8) + img_fin = (res[:, :] == 0) * 255 + img_last = img_last + img_fin + + kernel = np.ones((5, 5), np.uint8) + img_last[:, :][img_last[:, :] > 0] = 255 + img_last = (img_last[:, :] == 0) * 255 + + cv2.imwrite(os.path.join(dir_out,image_stem+'.png'), img_last) From 328d33e3dc294b4d93fcdca833ed679ee0169f9f Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 23 Oct 2024 16:55:41 +0200 Subject: [PATCH 45/50] =?UTF-8?q?Temporary=20commit=20=E2=80=93=20textline?= =?UTF-8?q?=20prediction=20without=20patches?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- qurator/eynollah/eynollah.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e587ff3..6ee3dc7 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2120,7 +2120,7 @@ class Eynollah: else: thresholding_for_artificial_class_in_light_version = False if not self.dir_in: - model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir if patches else self.model_textline_dir_np) + model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) #img = img.astype(np.uint8) img_org = np.copy(img) img_h = img_org.shape[0] @@ -3311,7 +3311,8 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + patches = False + textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) From 82281bd6cfa218e7e434fe8da535fae394d5f59c Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Fri, 25 Oct 2024 19:42:48 +0200 Subject: [PATCH 46/50] fixing a bug occuring with reading order + Slro option with no patch textline model and thresholding artificial class --- qurator/eynollah/eynollah.py | 79 +++++++++++++++++------------- qurator/eynollah/utils/__init__.py | 21 ++++---- qurator/eynollah/utils/xml.py | 2 +- 3 files changed, 58 insertions(+), 44 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index e587ff3..03252fb 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,11 +253,11 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -816,6 +816,14 @@ class Eynollah: verbose=0) seg = np.argmax(label_p_pred, axis=3)[0] + + if thresholding_for_artificial_class_in_light_version: + seg_art = label_p_pred[0,:,:,2] + + seg_art[seg_art<0.2] = 0 + seg_art[seg_art>0] =1 + + seg[seg_art==1]=2 seg_color = np.repeat(seg[:, :, np.newaxis], 3, axis=2) prediction_true = resize_image(seg_color, img_h_page, img_w_page) prediction_true = prediction_true.astype(np.uint8) @@ -1546,7 +1554,7 @@ class Eynollah: pass else: img = otsu_copy_binary(img) - img = img.astype(np.uint8) + #img = img.astype(np.uint8) prediction_regions2 = None else: if cols == 1: @@ -1605,9 +1613,12 @@ class Eynollah: img = img.astype(np.uint8) marginal_of_patch_percent = 0.1 - + prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) + + ##prediction_regions = self.do_prediction(False, img, model_region, marginal_of_patch_percent=marginal_of_patch_percent, n_batch_inference=3) + prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions, prediction_regions @@ -2148,7 +2159,7 @@ class Eynollah: if not thresholding_for_artificial_class_in_light_version: textline_mask_tot_ea_art = textline_mask_tot_ea_art.astype('uint8') - textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) + #textline_mask_tot_ea_art = cv2.dilate(textline_mask_tot_ea_art, KERNEL, iterations=1) prediction_textline[:,:][textline_mask_tot_ea_art[:,:]==1]=2 @@ -2245,26 +2256,27 @@ class Eynollah: #if (not self.input_binary) or self.full_layout: #if self.input_binary: #img_bin = np.copy(img_resized) - if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): - if not self.dir_in: - model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) - prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) - else: - prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) - - #print("inside bin ", time.time()-t_bin) - prediction_bin=prediction_bin[:,:,0] - prediction_bin = (prediction_bin[:,:]==0)*1 - prediction_bin = prediction_bin*255 - - prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) - - prediction_bin = prediction_bin.astype(np.uint16) - #img= np.copy(prediction_bin) - img_bin = np.copy(prediction_bin) - else: - img_bin = np.copy(img_resized) - + ###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30): + ###if not self.dir_in: + ###model_bin, session_bin = self.start_new_session_and_model(self.model_dir_of_binarization) + ###prediction_bin = self.do_prediction(True, img_resized, model_bin, n_batch_inference=5) + ###else: + ###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5) + + ####print("inside bin ", time.time()-t_bin) + ###prediction_bin=prediction_bin[:,:,0] + ###prediction_bin = (prediction_bin[:,:]==0)*1 + ###prediction_bin = prediction_bin*255 + + ###prediction_bin =np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2) + + ###prediction_bin = prediction_bin.astype(np.uint16) + ####img= np.copy(prediction_bin) + ###img_bin = np.copy(prediction_bin) + ###else: + ###img_bin = np.copy(img_resized) + + img_bin = np.copy(img_resized) #print("inside 1 ", time.time()-t_in) ###textline_mask_tot_ea = self.run_textline(img_bin) @@ -3311,7 +3323,8 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - textline_mask_tot_ea, _ = self.textline_contours(image_page, True, scaler_h_textline, scaler_w_textline, num_col_classifier) + patches = False + textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3564,9 +3577,9 @@ class Eynollah: image_page = image_page.astype(np.uint8) #print("full inside 1", time.time()- t_full0) if self.light_version: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, True, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(img_bin_light, False, cols=num_col_classifier) else: - regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, True, cols=num_col_classifier) + regions_fully, regions_fully_only_drop = self.extract_text_regions_new(image_page, False, cols=num_col_classifier) #print("full inside 2", time.time()- t_full0) # 6 is the separators lable in old full layout model # 4 is the drop capital class in old full layout model @@ -3590,7 +3603,7 @@ class Eynollah: regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + ##regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -4768,9 +4781,9 @@ class Eynollah: textline_mask_tot_ea_deskew = resize_image(textline_mask_tot_ea,img_h_new, img_w_new ) - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea_deskew) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea_deskew) else: - slope_deskew, slope_first = 0, 0#self.run_deskew(textline_mask_tot_ea) + slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) #print("text region early -2,5 in %.1fs", time.time() - t0) #self.logger.info("Textregion detection took %.1fs ", time.time() - t1t) num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light = \ diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index 6219df2..e7cbbea 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -1204,17 +1204,12 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): top = peaks_neg_new[i] down = peaks_neg_new[i + 1] - # print(top,down,'topdown') - indexes_in = matrix_of_orders[:, 0][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] cxs_in = matrix_of_orders[:, 2][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] cys_in = matrix_of_orders[:, 3][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] types_of_text = matrix_of_orders[:, 1][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] index_types_of_text = matrix_of_orders[:, 4][(matrix_of_orders[:, 3] >= top) & ((matrix_of_orders[:, 3] < down))] - # print(top,down) - # print(cys_in,'cyyyins') - # print(indexes_in,'indexes') sorted_inside = np.argsort(cxs_in) ind_in_int = indexes_in[sorted_inside] @@ -1228,11 +1223,17 @@ def order_of_regions(textline_mask, contours_main, contours_header, y_ref): ##matrix_of_orders[:len_main,4]=final_indexers_sorted[:] - # print(peaks_neg_new,'peaks') - # print(final_indexers_sorted,'indexsorted') - # print(final_types,'types') - # print(final_index_type,'final_index_type') - + # This fix is applied if the sum of the lengths of contours and contours_h does not match final_indexers_sorted. However, this is not the optimal solution.. + if (len(cy_main)+len(cy_header) ) == len(final_index_type): + pass + else: + indexes_missed = set(list( np.array( range((len(cy_main)+len(cy_header) ) )) )) - set(final_indexers_sorted) + for ind_missed in indexes_missed: + final_indexers_sorted.append(ind_missed) + final_types.append(1) + final_index_type.append(ind_missed) + + return final_indexers_sorted, matrix_of_orders, final_types, final_index_type def combine_hor_lines_and_delete_cross_points_and_get_lines_features_back_new(img_p_in_ver, img_in_hor,num_col_classifier): diff --git a/qurator/eynollah/utils/xml.py b/qurator/eynollah/utils/xml.py index 0386b25..bd95702 100644 --- a/qurator/eynollah/utils/xml.py +++ b/qurator/eynollah/utils/xml.py @@ -72,7 +72,7 @@ def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region index_of_types_2 = index_of_types[kind_of_texts == 2] indexes_sorted_2 = indexes_sorted[kind_of_texts == 2] - + counter = EynollahIdCounter(region_idx=ref_point) for idx_textregion, _ in enumerate(found_polygons_text_region): id_of_texts.append(counter.next_region_id) From 90ee2d61dc1d2ce05724d6d0f11c200ba1709108 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Mon, 28 Oct 2024 20:56:06 +0100 Subject: [PATCH 47/50] textline segmentation is masked with drop capitals --- qurator/eynollah/eynollah.py | 223 +++++++++++++++++++++-------------- 1 file changed, 135 insertions(+), 88 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 1cb00c7..d0a8299 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,11 +253,11 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: - self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: - self.model_textline_dir = dir_models + "/model_textline_ens_5_6_7_8_10_11_nopatch"#"/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" + self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/eynollah-textline_20210425" if self.ocr: self.model_ocr_dir = dir_models + "/checkpoint-166692_printed_trocr" @@ -502,7 +502,8 @@ class Eynollah: if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False - elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: @@ -523,7 +524,8 @@ class Eynollah: if label_p_pred[0][int(num_col - 1)] < 0.9 and img_w_new < width_early: img_new = np.copy(img) num_column_is_classified = False - elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + #elif label_p_pred[0][int(num_col - 1)] < 0.8 and img_h_new >= 8000: + elif img_h_new >= 8000: img_new = np.copy(img) num_column_is_classified = False else: @@ -3323,7 +3325,7 @@ class Eynollah: scaler_h_textline = 1#1.3 # 1.2#1.2 scaler_w_textline = 1#1.3 # 0.9#1 #print(image_page.shape) - patches = False + patches = True textline_mask_tot_ea, _ = self.textline_contours(image_page, patches, scaler_h_textline, scaler_w_textline, num_col_classifier) if self.textline_light: textline_mask_tot_ea = textline_mask_tot_ea.astype(np.int16) @@ -3634,6 +3636,7 @@ class Eynollah: regions_without_separators = (text_regions_p[:, :] == 1) * 1 img_revised_tab = np.copy(text_regions_p[:, :]) polygons_of_images = return_contours_of_interested_region(img_revised_tab, 5) + self.logger.debug('exit run_boxes_full_layout') #print("full inside 3", time.time()- t_full0) return polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables @@ -4169,7 +4172,123 @@ class Eynollah: x_differential_new[split_masked[i]:split_masked[i+1]] = -1*np.array(x_differential)[split_masked[i]:split_masked[i+1]] return x_differential_new - + def dilate_textregions_contours_textline_version(self,all_found_textline_polygons): + #print(all_found_textline_polygons) + + for j in range(len(all_found_textline_polygons)): + for ij in range(len(all_found_textline_polygons[j])): + + con_ind = all_found_textline_polygons[j][ij] + area = cv2.contourArea(con_ind) + con_ind = con_ind.astype(np.float) + + x_differential = np.diff( con_ind[:,0,0]) + y_differential = np.diff( con_ind[:,0,1]) + + + x_differential = gaussian_filter1d(x_differential, 0.1) + y_differential = gaussian_filter1d(y_differential, 0.1) + + x_min = float(np.min( con_ind[:,0,0] )) + y_min = float(np.min( con_ind[:,0,1] )) + + x_max = float(np.max( con_ind[:,0,0] )) + y_max = float(np.max( con_ind[:,0,1] )) + + x_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in x_differential] + y_differential_mask_nonzeros = [ ind/abs(ind) if ind!=0 else ind for ind in y_differential] + + abs_diff=abs(abs(x_differential)- abs(y_differential) ) + + inc_x = np.zeros(len(x_differential)+1) + inc_y = np.zeros(len(x_differential)+1) + + + if (y_max-y_min) <= (x_max-x_min): + dilation_m1 = round(area / (x_max-x_min) * 0.12) + else: + dilation_m1 = round(area / (y_max-y_min) * 0.12) + + if dilation_m1>8: + dilation_m1 = 8 + if dilation_m1<6: + dilation_m1 = 6 + #print(dilation_m1, 'dilation_m1') + dilation_m1 = 6 + dilation_m2 = int(dilation_m1/2.) +1 + + for i in range(len(x_differential)): + if abs_diff[i]==0: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + + elif abs_diff[i]!=0 and abs_diff[i]>=3: + if abs(x_differential[i])>abs(y_differential[i]): + inc_y[i+1] = dilation_m1*(x_differential_mask_nonzeros[i]) + else: + inc_x[i+1]= dilation_m1*(-1*y_differential_mask_nonzeros[i]) + else: + inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) + inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) + + + inc_x[0] = inc_x[-1] + inc_y[0] = inc_y[-1] + + con_scaled = con_ind*1 + + con_scaled[:,0, 0] = con_ind[:,0,0] + np.array(inc_x)[:] + con_scaled[:,0, 1] = con_ind[:,0,1] + np.array(inc_y)[:] + + con_scaled[:,0, 1][con_scaled[:,0, 1]<0] = 0 + con_scaled[:,0, 0][con_scaled[:,0, 0]<0] = 0 + + area_scaled = cv2.contourArea(con_scaled.astype(np.int32)) + + con_ind = con_ind.astype(np.int32) + + results = [cv2.pointPolygonTest(con_ind, (con_scaled[ind,0, 0], con_scaled[ind,0, 1]), False) for ind in range(len(con_scaled[:,0, 1])) ] + + results = np.array(results) + + #print(results,'results') + + results[results==0] = 1 + + + diff_result = np.diff(results) + + indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] + indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] + + + if results[0]==1: + con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] + con_scaled[:indices_m2[0]+1,0, 0] = con_ind[:indices_m2[0]+1,0,0] + #indices_2 = indices_2[1:] + indices_m2 = indices_m2[1:] + + + + if len(indices_2)>len(indices_m2): + con_scaled[indices_2[-1]+1:,0, 1] = con_ind[indices_2[-1]+1:,0,1] + con_scaled[indices_2[-1]+1:,0, 0] = con_ind[indices_2[-1]+1:,0,0] + + indices_2 = indices_2[:-1] + + + for ii in range(len(indices_2)): + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] + con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] + + + all_found_textline_polygons[j][ij][:,0,1] = con_scaled[:,0, 1] + all_found_textline_polygons[j][ij][:,0,0] = con_scaled[:,0, 0] + return all_found_textline_polygons def dilate_textregions_contours(self,all_found_textline_polygons): #print(all_found_textline_polygons) for j in range(len(all_found_textline_polygons)): @@ -4179,9 +4298,6 @@ class Eynollah: area = cv2.contourArea(con_ind) con_ind = con_ind.astype(np.float) - #con_ind[:,0,0] = gaussian_filter1d(con_ind[:,0,0], 0.5) - #con_ind[:,0,1] = gaussian_filter1d(con_ind[:,0,1], 0.5) - x_differential = np.diff( con_ind[:,0,0]) y_differential = np.diff( con_ind[:,0,1]) @@ -4235,29 +4351,6 @@ class Eynollah: inc_x[i+1] = dilation_m2*(-1*y_differential_mask_nonzeros[i]) inc_y[i+1] = dilation_m2*(x_differential_mask_nonzeros[i]) - ###for i in range(len(x_differential)): - ###if abs_diff[i]==0: - ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) - ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]==0 and y_differential_mask_nonzeros[i]!=0: - ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) - ###elif abs_diff[i]!=0 and x_differential_mask_nonzeros[i]!=0 and y_differential_mask_nonzeros[i]==0: - ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) - - ###elif abs_diff[i]!=0 and abs_diff[i]>=3: - ###if abs(x_differential[i])>abs(y_differential[i]): - ###inc_y[i+1] = 12*(x_differential_mask_nonzeros[i]) - ###else: - ###inc_x[i+1]= 12*(-1*y_differential_mask_nonzeros[i]) - ###else: - ###inc_x[i+1] = 7*(-1*y_differential_mask_nonzeros[i]) - ###inc_y[i+1] = 7*(x_differential_mask_nonzeros[i]) - - ###inc_x =list(inc_x) - ###inc_x.append(inc_x[0]) - - ###inc_y =list(inc_y) - ###inc_y.append(inc_y[0]) inc_x[0] = inc_x[-1] inc_y[0] = inc_y[-1] @@ -4288,21 +4381,6 @@ class Eynollah: indices_2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==2] indices_m2 = [ind for ind in range(len(diff_result)) if diff_result[ind]==-2] - #print(area_scaled / area, "ratio") - #print(results,'results') - #if results[0]==1 and diff_result[-1]==-2: - ##indices_2 = indices_2[1:] - ##indices_m2 = indices_m2[1:] - - #con_scaled[:indices_m2[0]+1,0, 1] = con_scaled[indices_m2[-1],0, 1] - #con_scaled[:indices_m2[0]+1,0, 0] = con_scaled[indices_m2[-1],0, 0] - - - #con_scaled[indices_2[-1]+1:,0, 1] = con_scaled[indices_m2[-1],0, 1] - #con_scaled[indices_2[-1]+1:,0, 0] = con_scaled[indices_m2[-1],0, 0] - - #indices_2 = indices_2[:-1] - #indices_m2 = indices_m2[1:-1] if results[0]==1: con_scaled[:indices_m2[0]+1,0, 1] = con_ind[:indices_m2[0]+1,0,1] @@ -4318,50 +4396,12 @@ class Eynollah: indices_2 = indices_2[:-1] - - - #diff_neg_pos = np.array(indices_m2) - np.array(indices_2) - - - #print(diff_neg_pos,'diff') - ##print(indices_2, 'indices_2') - #indices_2 = np.array(indices_2)[diff_neg_pos>1] - #indices_m2 = np.array(indices_m2)[diff_neg_pos>1] for ii in range(len(indices_2)): - #x_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 0] - #y_inner = con_ind[indices_2[ii]+1:indices_m2[ii]+1,0, 1] - - #if x_inner[-1]>=x_inner[0]: - #x_interest = np.min(x_inner) - #else: - #x_interest = np.max(x_inner) - - #if y_inner[-1]>=y_inner[0]: - #y_interest = np.min(y_inner) - #else: - #y_interest = np.max(y_inner) - con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 1] = con_scaled[indices_2[ii],0, 1] con_scaled[indices_2[ii]+1:indices_m2[ii]+1,0, 0] = con_scaled[indices_2[ii],0, 0] - - #con_scaled[:,0, 1][results[:]>0] = con_ind[:,0,1][results[:]>0] - #con_scaled[:,0, 0][results[:]>0] = con_ind[:,0,0][results[:]>0] - - #print(list(results), 'results') - #print(list(diff_result), 'diff_result') - #print(indices_2,'2') - #print(indices_m2,'-2') - #print(diff_neg_pos,'diff_neg_pos') - - ##con_scaled[:,0, 1] = gaussian_filter1d(con_scaled[:,0, 1], 0.1) - ##con_scaled[:,0, 0] = gaussian_filter1d(con_scaled[:,0, 0], 0.1) - - #con_scaled[-1,0, 1] = con_scaled[0,0, 1] - #con_scaled[-1,0, 0] = con_scaled[0,0, 0] - ##print(len(con_scaled[:,0,0]),'con_scaled[:,0,0]') all_found_textline_polygons[j][:,0,1] = con_scaled[:,0, 1] all_found_textline_polygons[j][:,0,0] = con_scaled[:,0, 0] return all_found_textline_polygons @@ -4865,6 +4905,12 @@ class Eynollah: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) + + if self.light_version: + drop_label_in_full_layout = 4 + textline_mask_tot_ea_org[img_revised_tab==drop_label_in_full_layout] = 0 + + text_only = ((img_revised_tab[:, :] == 1)) * 1 if np.abs(slope_deskew) >= SLOPE_THRESHOLD: text_only_d = ((text_regions_p_1_n[:, :] == 1)) * 1 @@ -5018,7 +5064,8 @@ class Eynollah: #slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, _ = self.delete_regions_without_textlines(slopes_marginals, all_found_textline_polygons_marginals, boxes_marginals, polygons_of_marginals, polygons_of_marginals, np.array(range(len(polygons_of_marginals)))) #all_found_textline_polygons = self.dilate_textlines(all_found_textline_polygons) - all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) From 438df5228705e93f52d43a17a9284cc199fb97f4 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Oct 2024 00:52:09 +0100 Subject: [PATCH 48/50] updating --- qurator/eynollah/eynollah.py | 8 +++++--- qurator/eynollah/utils/__init__.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index d0a8299..543ed92 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -1726,6 +1726,7 @@ class Eynollah: polygons_of_textlines = return_contours_of_interested_region(textline_mask_tot,1,0.00001) + M_main_tot = [cv2.moments(polygons_of_textlines[j]) for j in range(len(polygons_of_textlines))] cx_main_tot = [(M_main_tot[j]["m10"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] cy_main_tot = [(M_main_tot[j]["m01"] / (M_main_tot[j]["m00"] + 1e-32)) for j in range(len(M_main_tot))] @@ -3605,7 +3606,7 @@ class Eynollah: regions_fully[:,:,0][drops[:,:]==1] = drop_capital_label_in_full_layout_model - ##regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) + regions_fully = putt_bb_of_drop_capitals_of_model_in_patches_in_layout(regions_fully, drop_capital_label_in_full_layout_model) ##regions_fully_np, _ = self.extract_text_regions(image_page, False, cols=num_col_classifier) ##if num_col_classifier > 2: ##regions_fully_np[:, :, 0][regions_fully_np[:, :, 0] == 4] = 0 @@ -4901,6 +4902,7 @@ class Eynollah: polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, boxes, boxes_d, polygons_of_marginals, contours_tables = self.run_boxes_no_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, table_prediction, erosion_hurts) ###polygons_of_marginals = self.dilate_textregions_contours(polygons_of_marginals) if self.full_layout: + cv2.imwrite('dewar_page.png', image_page) if not self.light_version: img_bin_light = None polygons_of_images, img_revised_tab, text_regions_p_1_n, textline_mask_tot_d, regions_without_separators_d, regions_fully, regions_without_separators, polygons_of_marginals, contours_tables = self.run_boxes_full_layout(image_page, textline_mask_tot, text_regions_p, slope_deskew, num_col_classifier, img_only_regions, table_prediction, erosion_hurts, img_bin_light) @@ -5067,7 +5069,7 @@ class Eynollah: #####all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea_org, type_contour="textline") - all_found_textline_polygons_marginals = self.dilate_textline_contours(all_found_textline_polygons_marginals) + all_found_textline_polygons_marginals = self.dilate_textregions_contours_textline_version(all_found_textline_polygons_marginals) else: textline_mask_tot_ea = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1) @@ -5261,7 +5263,7 @@ class Eynollah: all_found_textline_polygons=[ all_found_textline_polygons ] - all_found_textline_polygons = self.dilate_textline_contours(all_found_textline_polygons) + all_found_textline_polygons = self.dilate_textregions_contours_textline_version(all_found_textline_polygons) all_found_textline_polygons = self.filter_contours_inside_a_bigger_one(all_found_textline_polygons, textline_mask_tot_ea, type_contour="textline") diff --git a/qurator/eynollah/utils/__init__.py b/qurator/eynollah/utils/__init__.py index e7cbbea..29f80b4 100644 --- a/qurator/eynollah/utils/__init__.py +++ b/qurator/eynollah/utils/__init__.py @@ -792,7 +792,7 @@ def putt_bb_of_drop_capitals_of_model_in_patches_in_layout(layout_in_patch, drop for jj in range(len(contours_drop_parent)): x, y, w, h = cv2.boundingRect(contours_drop_parent[jj]) - if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.8: + if ( ( areas_cnt_text[jj] * float(drop_only.shape[0] * drop_only.shape[1]) ) / float(w*h) ) > 0.4: layout_in_patch[y : y + h, x : x + w, 0] = drop_capital_label else: From e796a99c5cae651ae1601f2033feecd695b382f2 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Wed, 30 Oct 2024 15:02:50 +0100 Subject: [PATCH 49/50] updating inference for early layout in the case of documents with number of columns bigger than 2 --- qurator/eynollah/eynollah.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 543ed92..0a1c2b1 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -2296,9 +2296,8 @@ class Eynollah: #plt.show() if not skip_layout_and_reading_order: #print("inside 2 ", time.time()-t_in) - if not self.dir_in: - if num_col_classifier == 1 or num_col_classifier >= 2: + if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) @@ -2307,12 +2306,12 @@ class Eynollah: prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) + model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: - if num_col_classifier == 1 or num_col_classifier >= 2: + if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) else: @@ -2320,7 +2319,7 @@ class Eynollah: prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region, n_batch_inference=3) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) From 751b0102f7787f2ab8a45e3ecc4604e7c107e1e6 Mon Sep 17 00:00:00 2001 From: vahidrezanezhad Date: Tue, 5 Nov 2024 19:50:18 +0100 Subject: [PATCH 50/50] updating early layout inference for light version --- qurator/eynollah/eynollah.py | 37 ++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/qurator/eynollah/eynollah.py b/qurator/eynollah/eynollah.py index 0a1c2b1..9095c15 100644 --- a/qurator/eynollah/eynollah.py +++ b/qurator/eynollah/eynollah.py @@ -245,7 +245,7 @@ class Eynollah: self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425" self.model_region_dir_p = dir_models + "/eynollah-main-regions-aug-scaling_20210425" self.model_region_dir_p2 = dir_models + "/eynollah-main-regions-aug-rotation_20210425" - self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" + self.model_region_dir_fully_np = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/eynollah-full-regions-1column_20210425" #self.model_region_dir_fully = dir_models + "/eynollah-full-regions-3+column_20210425" self.model_page_dir = dir_models + "/eynollah-page-extraction_20210425" self.model_region_dir_p_ens = dir_models + "/eynollah-main-regions-ensembled_20210425" @@ -253,7 +253,7 @@ class Eynollah: self.model_reading_order_machine_dir = dir_models + "/model_ens_reading_order_machine_based" self.model_region_dir_p_1_2_sp_np = dir_models + "/modelens_e_l_all_sp_0_1_2_3_4_171024"#"/modelens_12sp_elay_0_3_4__3_6_n"#"/modelens_earlylayout_12spaltige_2_3_5_6_7_8"#"/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18"#"/modelens_1_2_4_5_early_lay_1_2_spaltige"#"/model_3_eraly_layout_no_patches_1_2_spaltige" ##self.model_region_dir_fully_new = dir_models + "/model_2_full_layout_new_trans" - self.model_region_dir_fully = dir_models + "/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" + self.model_region_dir_fully = dir_models + "/modelens_full_lay_1_3_031124"#"/modelens_full_lay_13__3_19_241024"#"/model_full_lay_13_241024"#"/modelens_full_lay_13_17_231024"#"/modelens_full_lay_1_2_221024"#"/modelens_full_layout_24_till_28"#"/model_2_full_layout_new_trans" if self.textline_light: self.model_textline_dir = dir_models + "/modelens_textline_0_1__2_4_16092024"#"/modelens_textline_1_4_16092024"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_1_3_4_20240915"#"/model_textline_ens_3_4_5_6_artificial"#"/modelens_textline_9_12_13_14_15"#"/eynollah-textline_light_20210425"# else: @@ -743,7 +743,7 @@ class Eynollah: def get_image_and_scales_after_enhancing(self, img_org, img_res): self.logger.debug("enter get_image_and_scales_after_enhancing") self.image = np.copy(img_res) - #self.image = self.image.astype(np.uint8) + self.image = self.image.astype(np.uint8) self.image_org = np.copy(img_org) self.height_org = self.image_org.shape[0] self.width_org = self.image_org.shape[1] @@ -1298,20 +1298,25 @@ class Eynollah: seg = np.argmax(label_p_pred, axis=3) if thresholding_for_some_classes_in_light_version: - seg_not_base = label_p_pred[:,:,:,4] - seg_not_base[seg_not_base>0.03] =1 - seg_not_base[seg_not_base<1] =0 + + seg_art = label_p_pred[:,:,:,4] + seg_art[seg_art<0.2] =0 + seg_art[seg_art>0] =1 + ###seg[seg_art==1]=4 + ##seg_not_base = label_p_pred[:,:,:,4] + ##seg_not_base[seg_not_base>0.03] =1 + ##seg_not_base[seg_not_base<1] =0 seg_line = label_p_pred[:,:,:,3] seg_line[seg_line>0.1] =1 seg_line[seg_line<1] =0 - seg_background = label_p_pred[:,:,:,0] - seg_background[seg_background>0.25] =1 - seg_background[seg_background<1] =0 + ##seg_background = label_p_pred[:,:,:,0] + ##seg_background[seg_background>0.25] =1 + ##seg_background[seg_background<1] =0 - seg[seg_not_base==1]=4 - seg[seg_background==1]=0 + seg[seg_art==1]=4 + ##seg[seg_background==1]=0 seg[(seg_line==1) & (seg==0)]=3 if thresholding_for_artificial_class_in_light_version: seg_art = label_p_pred[:,:,:,2] @@ -2300,26 +2305,26 @@ class Eynollah: if num_col_classifier == 1 or num_col_classifier == 2: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, model_region, n_batch_inference=1, thresholding_for_some_classes_in_light_version = True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, model_region, n_batch_inference=1, thresholding_for_artificial_class_in_light_version = True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), model_region, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ##model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) ##prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) else: if num_col_classifier == 1 or num_col_classifier == 2: if self.image_org.shape[0]/self.image_org.shape[1] > 2.5: - prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) + prediction_regions_org = self.do_prediction_new_concept(True, img_resized, self.model_region_1_2, n_batch_inference=1, thresholding_for_some_classes_in_light_version=True) else: prediction_regions_org = np.zeros((self.image_org.shape[0], self.image_org.shape[1], 3)) prediction_regions_page = self.do_prediction_new_concept(False, self.image_page_org_size, self.model_region_1_2, n_batch_inference=1, thresholding_for_artificial_class_in_light_version=True) prediction_regions_org[self.page_coord[0] : self.page_coord[1], self.page_coord[2] : self.page_coord[3],:] = prediction_regions_page else: - prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2) + prediction_regions_org = self.do_prediction_new_concept(True, resize_image(img_bin, int( (900+ (num_col_classifier-3)*100) *(img_bin.shape[0]/img_bin.shape[1]) ), 900+ (num_col_classifier-3)*100), self.model_region_1_2, n_batch_inference=2, thresholding_for_some_classes_in_light_version=True) ###prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True) #print("inside 3 ", time.time()-t_in) @@ -4595,7 +4600,7 @@ class Eynollah: areas_without = np.array(areas_tot)[args_all] area_of_con_interest = areas_tot[ij] - args_with_bigger_area = np.array(args_all)[areas_without > area_of_con_interest] + args_with_bigger_area = np.array(args_all)[areas_without > 1.5*area_of_con_interest] if len(args_with_bigger_area)>0: results = [cv2.pointPolygonTest(contours_txtline_of_all_textregions[ind], (cx_main_tot[ij], cy_main_tot[ij]), False) for ind in args_with_bigger_area ]