adding option for textline detection in printspace

pull/138/head^2
vahidrezanezhad 4 months ago
parent c3a4a1bba7
commit f0b49073b7

@ -741,7 +741,7 @@ class Eynollah:
return model, None return model, None
def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1): def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False):
self.logger.debug("enter do_prediction") self.logger.debug("enter do_prediction")
img_height_model = model.layers[len(model.layers) - 1].output_shape[1] img_height_model = model.layers[len(model.layers) - 1].output_shape[1]
@ -774,7 +774,7 @@ class Eynollah:
width_mid = img_width_model - 2 * margin width_mid = img_width_model - 2 * margin
height_mid = img_height_model - 2 * margin height_mid = img_height_model - 2 * margin
img = img / float(255.0) img = img / float(255.0)
img = img.astype(np.float16) #img = img.astype(np.float16)
img_h = img.shape[0] img_h = img.shape[0]
img_w = img.shape[1] img_w = img.shape[1]
prediction_true = np.zeros((img_h, img_w, 3)) prediction_true = np.zeros((img_h, img_w, 3))
@ -832,6 +832,23 @@ class Eynollah:
seg = np.argmax(label_p_pred, axis=3) seg = np.argmax(label_p_pred, axis=3)
if thresholding_for_some_classes_in_light_version:
seg_not_base = label_p_pred[:,:,:,4]
seg_not_base[seg_not_base>0.03] =1
seg_not_base[seg_not_base<1] =0
seg_line = label_p_pred[:,:,:,3]
seg_line[seg_line>0.1] =1
seg_line[seg_line<1] =0
seg_background = label_p_pred[:,:,:,0]
seg_background[seg_background>0.25] =1
seg_background[seg_background<1] =0
seg[seg_not_base==1]=4
seg[seg_background==1]=0
seg[(seg_line==1) & (seg==0)]=3
indexer_inside_batch = 0 indexer_inside_batch = 0
for i_batch, j_batch in zip(list_i_s, list_j_s): for i_batch, j_batch in zip(list_i_s, list_j_s):
seg_in = seg[indexer_inside_batch,:,:] seg_in = seg[indexer_inside_batch,:,:]
@ -889,6 +906,22 @@ class Eynollah:
label_p_pred = model.predict(img_patch,verbose=0) label_p_pred = model.predict(img_patch,verbose=0)
seg = np.argmax(label_p_pred, axis=3) seg = np.argmax(label_p_pred, axis=3)
if thresholding_for_some_classes_in_light_version:
seg_not_base = label_p_pred[:,:,:,4]
seg_not_base[seg_not_base>0.03] =1
seg_not_base[seg_not_base<1] =0
seg_line = label_p_pred[:,:,:,3]
seg_line[seg_line>0.1] =1
seg_line[seg_line<1] =0
seg_background = label_p_pred[:,:,:,0]
seg_background[seg_background>0.25] =1
seg_background[seg_background<1] =0
seg[seg_not_base==1]=4
seg[seg_background==1]=0
seg[(seg_line==1) & (seg==0)]=3
indexer_inside_batch = 0 indexer_inside_batch = 0
for i_batch, j_batch in zip(list_i_s, list_j_s): for i_batch, j_batch in zip(list_i_s, list_j_s):
@ -1202,9 +1235,9 @@ class Eynollah:
img_height_h = img.shape[0] img_height_h = img.shape[0]
img_width_h = img.shape[1] img_width_h = img.shape[1]
if not self.dir_in: if not self.dir_in:
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np)
else: else:
model_region = self.model_region_fl_new if patches else self.model_region_fl_np model_region = self.model_region_fl if patches else self.model_region_fl_np
if not patches: if not patches:
if self.light_version: if self.light_version:
@ -1809,7 +1842,7 @@ class Eynollah:
q.put(slopes_sub) q.put(slopes_sub)
poly.put(poly_sub) poly.put(poly_sub)
box_sub.put(boxes_sub_new) box_sub.put(boxes_sub_new)
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier): def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False):
self.logger.debug("enter get_regions_light_v") self.logger.debug("enter get_regions_light_v")
t_in = time.time() t_in = time.time()
erosion_hurts = False erosion_hurts = False
@ -1866,28 +1899,34 @@ class Eynollah:
textline_mask_tot_ea = self.run_textline(img_bin) textline_mask_tot_ea = self.run_textline(img_bin)
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
if not skip_layout_ro:
#print("inside 2 ", time.time()-t_in) #print("inside 2 ", time.time()-t_in)
#print(img_resized.shape, num_col_classifier, "num_col_classifier") #print(img_resized.shape, num_col_classifier, "num_col_classifier")
if not self.dir_in: if not self.dir_in:
if num_col_classifier == 1 or num_col_classifier == 2: ###if num_col_classifier == 1 or num_col_classifier == 2:
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np) ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region) ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region)
else: ###else:
###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region)
model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light) model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region) prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
else: else:
if num_col_classifier == 1 or num_col_classifier == 2: ##if num_col_classifier == 1 or num_col_classifier == 2:
prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2) ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2)
else: ##else:
prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region) ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region)
prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
#print("inside 3 ", time.time()-t_in) #print("inside 3 ", time.time()-t_in)
#plt.imshow(prediction_regions_org[:,:,0]) #plt.imshow(prediction_regions_org[:,:,0])
#plt.show() #plt.show()
prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h ) prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
img_bin = resize_image(img_bin,img_height_h, img_width_h ) img_bin = resize_image(img_bin,img_height_h, img_width_h )
prediction_regions_org=prediction_regions_org[:,:,0] prediction_regions_org=prediction_regions_org[:,:,0]
@ -1949,6 +1988,9 @@ class Eynollah:
text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1)) text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
#print("inside 4 ", time.time()-t_in) #print("inside 4 ", time.time()-t_in)
return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin
else:
img_bin = resize_image(img_bin,img_height_h, img_width_h )
return None, erosion_hurts, None, textline_mask_tot_ea, img_bin
def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier): def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
self.logger.debug("enter get_regions_from_xy_2models") self.logger.debug("enter get_regions_from_xy_2models")
@ -2392,8 +2434,6 @@ class Eynollah:
ref_point += len(id_of_texts) ref_point += len(id_of_texts)
order_of_texts_tot = [] order_of_texts_tot = []
print(len(contours_only_text_parent),'contours_only_text_parent')
print(len(order_by_con_main),'order_by_con_main')
for tj1 in range(len(contours_only_text_parent)): for tj1 in range(len(contours_only_text_parent)):
order_of_texts_tot.append(int(order_by_con_main[tj1])) order_of_texts_tot.append(int(order_by_con_main[tj1]))
@ -2768,6 +2808,28 @@ class Eynollah:
num_col = None num_col = None
#print("inside graphics 3 ", time.time() - t_in_gr) #print("inside graphics 3 ", time.time() - t_in_gr)
return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light
def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light):
#print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics')
#print(erosion_hurts, 'erosion_hurts')
t_in_gr = time.time()
img_g = self.imread(grayscale=True, uint8=True)
img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
img_g3 = img_g3.astype(np.uint8)
img_g3[:, :, 0] = img_g[:, :]
img_g3[:, :, 1] = img_g[:, :]
img_g3[:, :, 2] = img_g[:, :]
image_page, page_coord, cont_page = self.extract_page()
#print("inside graphics 1 ", time.time() - t_in_gr)
textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
return page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page
def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts): def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts):
t_in_gr = time.time() t_in_gr = time.time()
img_g = self.imread(grayscale=True, uint8=True) img_g = self.imread(grayscale=True, uint8=True)
@ -3633,6 +3695,8 @@ class Eynollah:
""" """
self.logger.debug("enter run") self.logger.debug("enter run")
skip_layout_ro = True
t0_tot = time.time() t0_tot = time.time()
if not self.dir_in: if not self.dir_in:
@ -3649,6 +3713,8 @@ class Eynollah:
self.logger.info("Enhancing took %.1fs ", time.time() - t0) self.logger.info("Enhancing took %.1fs ", time.time() - t0)
#print("text region early -1 in %.1fs", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0)
t1 = time.time() t1 = time.time()
if not skip_layout_ro:
if self.light_version: if self.light_version:
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
#print("text region early -2 in %.1fs", time.time() - t0) #print("text region early -2 in %.1fs", time.time() - t0)
@ -3929,13 +3995,6 @@ class Eynollah:
else: else:
boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left) boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left)
#print(boxes_d,'boxes_d')
#img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1]))
#for box_i in boxes_d:
#img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1
#plt.imshow(img_once)
#plt.show()
#print(np.unique(img_once),'img_once')
if self.plotter: if self.plotter:
self.plotter.write_images_into_directory(polygons_of_images, image_page) self.plotter.write_images_into_directory(polygons_of_images, image_page)
t_order = time.time() t_order = time.time()
@ -4041,6 +4100,32 @@ class Eynollah:
if not self.dir_in: if not self.dir_in:
return pcgts return pcgts
#print("text region early 7 in %.1fs", time.time() - t0) #print("text region early 7 in %.1fs", time.time() - t0)
else:
_ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro)
page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
all_found_textline_polygons=[ all_found_textline_polygons ]
order_text_new = [0]
slopes =[0]
id_of_texts_tot =['region_0001']
polygons_of_images = []
slopes_marginals = []
polygons_of_marginals = []
all_found_textline_polygons_marginals = []
all_box_coord_marginals = []
polygons_lines_xml = []
contours_tables = []
ocr_all_textlines = None
pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
if not self.dir_in:
return pcgts
if self.dir_in: if self.dir_in:
self.writer.write_pagexml(pcgts) self.writer.write_pagexml(pcgts)
#self.logger.info("Job done in %.1fs", time.time() - t0) #self.logger.info("Job done in %.1fs", time.time() - t0)

Loading…
Cancel
Save