adding option for textline detection in printspace

2025-07-24 18:29:58 +02:00 · 2024-09-03 23:10:38 +02:00 · 2024-09-03 23:10:38 +02:00 · f0b49073b7
commit f0b49073b7
parent c3a4a1bba7
1 changed files with 522 additions and 437 deletions
--- a/qurator/eynollah/eynollah.py
+++ b/qurator/eynollah/eynollah.py
@ -741,7 +741,7 @@ class Eynollah:

        return model, None

-    def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1):
+    def do_prediction(self, patches, img, model, n_batch_inference=1, marginal_of_patch_percent=0.1, thresholding_for_some_classes_in_light_version=False):
        self.logger.debug("enter do_prediction")

        img_height_model = model.layers[len(model.layers) - 1].output_shape[1]
@ -774,7 +774,7 @@ class Eynollah:
            width_mid = img_width_model - 2 * margin
            height_mid = img_height_model - 2 * margin
            img = img / float(255.0)
-            img = img.astype(np.float16)
+            #img = img.astype(np.float16)
            img_h = img.shape[0]
            img_w = img.shape[1]
            prediction_true = np.zeros((img_h, img_w, 3))
@ -832,6 +832,23 @@ class Eynollah:
                        
                        seg = np.argmax(label_p_pred, axis=3)
                        
+                        if thresholding_for_some_classes_in_light_version:
+                            seg_not_base = label_p_pred[:,:,:,4]
+                            seg_not_base[seg_not_base>0.03] =1
+                            seg_not_base[seg_not_base<1] =0
+                            
+                            seg_line = label_p_pred[:,:,:,3]
+                            seg_line[seg_line>0.1] =1
+                            seg_line[seg_line<1] =0
+                            
+                            seg_background = label_p_pred[:,:,:,0]
+                            seg_background[seg_background>0.25] =1
+                            seg_background[seg_background<1] =0
+                            
+                            seg[seg_not_base==1]=4
+                            seg[seg_background==1]=0
+                            seg[(seg_line==1) & (seg==0)]=3
+                        
                        indexer_inside_batch = 0
                        for i_batch, j_batch in zip(list_i_s, list_j_s):
                            seg_in = seg[indexer_inside_batch,:,:]
@ -889,6 +906,22 @@ class Eynollah:
                        label_p_pred = model.predict(img_patch,verbose=0)
                        
                        seg = np.argmax(label_p_pred, axis=3)
+                        if thresholding_for_some_classes_in_light_version:
+                            seg_not_base = label_p_pred[:,:,:,4]
+                            seg_not_base[seg_not_base>0.03] =1
+                            seg_not_base[seg_not_base<1] =0
+                            
+                            seg_line = label_p_pred[:,:,:,3]
+                            seg_line[seg_line>0.1] =1
+                            seg_line[seg_line<1] =0
+                            
+                            seg_background = label_p_pred[:,:,:,0]
+                            seg_background[seg_background>0.25] =1
+                            seg_background[seg_background<1] =0
+                            
+                            seg[seg_not_base==1]=4
+                            seg[seg_background==1]=0
+                            seg[(seg_line==1) & (seg==0)]=3
                        
                        indexer_inside_batch = 0
                        for i_batch, j_batch in zip(list_i_s, list_j_s):
@ -1202,9 +1235,9 @@ class Eynollah:
        img_height_h = img.shape[0]
        img_width_h = img.shape[1]
        if not self.dir_in:
-            model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully_new if patches else self.model_region_dir_fully_np)
+            model_region, session_region = self.start_new_session_and_model(self.model_region_dir_fully if patches else self.model_region_dir_fully_np)
        else:
-            model_region = self.model_region_fl_new if patches else self.model_region_fl_np
+            model_region = self.model_region_fl if patches else self.model_region_fl_np

        if not patches:
            if self.light_version:
@ -1809,7 +1842,7 @@ class Eynollah:
        q.put(slopes_sub)
        poly.put(poly_sub)
        box_sub.put(boxes_sub_new)
-    def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
+    def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier, skip_layout_ro=False):
        self.logger.debug("enter get_regions_light_v")
        t_in = time.time()
        erosion_hurts = False
@ -1866,28 +1899,34 @@ class Eynollah:
        
        textline_mask_tot_ea = self.run_textline(img_bin)
        
+        textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
+        
+        if not skip_layout_ro:
            #print("inside 2 ", time.time()-t_in)
            
            #print(img_resized.shape, num_col_classifier, "num_col_classifier")
            if not self.dir_in:
-            if num_col_classifier == 1 or num_col_classifier == 2:
-                model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
-                prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region)
-            else:
+                ###if num_col_classifier == 1 or num_col_classifier == 2:
+                    ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_1_2_sp_np)
+                    ###prediction_regions_org = self.do_prediction_new_concept(False, img_resized, model_region)
+                ###else:
+                    ###model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
+                    ###prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region)
                model_region, session_region = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
-                prediction_regions_org = self.do_prediction_new_concept(True, img_bin, model_region)
+                prediction_regions_org = self.do_prediction(True, img_bin, model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
            else:
-            if num_col_classifier == 1 or num_col_classifier == 2:
-                prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2)
-            else:
-                prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region)
+                ##if num_col_classifier == 1 or num_col_classifier == 2:
+                    ##prediction_regions_org = self.do_prediction_new_concept(False, img_resized, self.model_region_1_2)
+                ##else:
+                    ##prediction_regions_org = self.do_prediction_new_concept(True, img_bin, self.model_region)
+                prediction_regions_org = self.do_prediction(True, img_bin, self.model_region, n_batch_inference=3, thresholding_for_some_classes_in_light_version=True)
            
            #print("inside 3 ", time.time()-t_in)
            #plt.imshow(prediction_regions_org[:,:,0])
            #plt.show()
                
            prediction_regions_org = resize_image(prediction_regions_org,img_height_h, img_width_h )
-        textline_mask_tot_ea = resize_image(textline_mask_tot_ea,img_height_h, img_width_h )
+            
            img_bin = resize_image(img_bin,img_height_h, img_width_h )
            
            prediction_regions_org=prediction_regions_org[:,:,0]
@ -1949,6 +1988,9 @@ class Eynollah:
            text_regions_p_true = cv2.fillPoly(text_regions_p_true, pts = polygons_of_only_texts, color=(1,1,1))
            #print("inside 4 ", time.time()-t_in)
            return text_regions_p_true, erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin
+        else:
+            img_bin = resize_image(img_bin,img_height_h, img_width_h )
+            return None, erosion_hurts, None, textline_mask_tot_ea, img_bin

    def get_regions_from_xy_2models(self,img,is_image_enhanced, num_col_classifier):
        self.logger.debug("enter get_regions_from_xy_2models")
@ -2392,8 +2434,6 @@ class Eynollah:
                ref_point += len(id_of_texts)

            order_of_texts_tot = []
-            print(len(contours_only_text_parent),'contours_only_text_parent')
-            print(len(order_by_con_main),'order_by_con_main')
            
            for tj1 in range(len(contours_only_text_parent)):
                order_of_texts_tot.append(int(order_by_con_main[tj1]))
@ -2768,6 +2808,28 @@ class Eynollah:
            num_col = None
        #print("inside graphics 3 ", time.time() - t_in_gr)
        return num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea, img_bin_light
+    
+    def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, img_bin_light):
+        
+        #print(text_regions_p_1.shape, 'text_regions_p_1 shape run graphics')
+        #print(erosion_hurts, 'erosion_hurts')
+        t_in_gr = time.time()
+        img_g = self.imread(grayscale=True, uint8=True)
+
+        img_g3 = np.zeros((img_g.shape[0], img_g.shape[1], 3))
+        img_g3 = img_g3.astype(np.uint8)
+        img_g3[:, :, 0] = img_g[:, :]
+        img_g3[:, :, 1] = img_g[:, :]
+        img_g3[:, :, 2] = img_g[:, :]
+
+        image_page, page_coord, cont_page = self.extract_page()
+        #print("inside graphics 1 ", time.time() - t_in_gr)
+        
+        textline_mask_tot_ea = textline_mask_tot_ea[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+        
+        img_bin_light = img_bin_light[page_coord[0] : page_coord[1], page_coord[2] : page_coord[3]]
+        
+        return  page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page
    def run_graphics_and_columns(self, text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts):
        t_in_gr = time.time()
        img_g = self.imread(grayscale=True, uint8=True)
@ -3633,6 +3695,8 @@ class Eynollah:
        """
        self.logger.debug("enter run")
        
+        skip_layout_ro = True
+
        t0_tot = time.time()

        if not self.dir_in:
@ -3649,6 +3713,8 @@ class Eynollah:
            self.logger.info("Enhancing took %.1fs ", time.time() - t0)
            #print("text region early -1 in %.1fs", time.time() - t0)
            t1 = time.time()
+            
+            if not skip_layout_ro:
                if self.light_version:
                    text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
                    #print("text region early -2 in %.1fs", time.time() - t0)
@ -3929,13 +3995,6 @@ class Eynollah:
                        else:
                            boxes_d, peaks_neg_tot_tables_d = return_boxes_of_images_by_order_of_reading_new(splitter_y_new_d, regions_without_separators_d, matrix_of_lines_ch_d, num_col_classifier, erosion_hurts, self.tables, self.right2left)     

-            #print(boxes_d,'boxes_d')
-            #img_once = np.zeros((textline_mask_tot_d.shape[0],textline_mask_tot_d.shape[1]))
-            #for box_i in boxes_d:
-                #img_once[int(box_i[2]):int(box_i[3]),int(box_i[0]):int(box_i[1]) ] =1
-            #plt.imshow(img_once)
-            #plt.show()
-            #print(np.unique(img_once),'img_once')
                if self.plotter:
                    self.plotter.write_images_into_directory(polygons_of_images, image_page)
                t_order = time.time()
@ -4041,6 +4100,32 @@ class Eynollah:
                    if not self.dir_in:
                        return pcgts
                #print("text region early 7 in %.1fs", time.time() - t0)
+            else:
+                _ ,_, _, textline_mask_tot_ea, img_bin_light = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, skip_layout_ro=skip_layout_ro)
+                
+                page_coord, image_page, textline_mask_tot_ea, img_bin_light, cont_page = self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, img_bin_light)
+                
+                cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
+                all_found_textline_polygons = filter_contours_area_of_image(textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001)
+                
+                all_found_textline_polygons=[ all_found_textline_polygons ]
+                order_text_new = [0]
+                slopes =[0]
+                id_of_texts_tot =['region_0001']
+                
+                polygons_of_images = []
+                slopes_marginals = []
+                polygons_of_marginals = []
+                all_found_textline_polygons_marginals = []
+                all_box_coord_marginals = []
+                polygons_lines_xml = []
+                contours_tables = []
+                ocr_all_textlines = None
+                
+                pcgts = self.writer.build_pagexml_no_full_layout(cont_page, page_coord, order_text_new, id_of_texts_tot, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines)
+                if not self.dir_in:
+                    return pcgts
+            
            if self.dir_in:
                self.writer.write_pagexml(pcgts)
            #self.logger.info("Job done in %.1fs", time.time() - t0)