reading order detection on xml with layout + result will be written in an output directory with the same file name

2025-10-09 07:40:00 +02:00 · 2024-05-29 11:18:35 +02:00 · 2024-05-29 11:18:35 +02:00 · f6abefb0a8
commit f6abefb0a8
parent 2e7c69f2ac
2 changed files with 99 additions and 20 deletions
--- a/inference.py
+++ b/inference.py
@ -16,6 +16,7 @@ import click
 import json
 from tensorflow.python.keras import backend as tensorflow_backend
 import xml.etree.ElementTree as ET
+import matplotlib.pyplot as plt


 with warnings.catch_warnings():
@ -27,7 +28,7 @@ Tool to load model and predict for given image.
 """

 class sbb_predict:
-    def __init__(self,image, model, task, config_params_model, patches, save, ground_truth, xml_file):
+    def __init__(self,image, model, task, config_params_model, patches, save, ground_truth, xml_file, out):
        self.image=image
        self.patches=patches
        self.save=save
@ -36,6 +37,7 @@ class sbb_predict:
        self.task=task
        self.config_params_model=config_params_model
        self.xml_file = xml_file
+        self.out = out

    def resize_image(self,img_in,input_height,input_width):
        return cv2.resize( img_in, ( input_width,input_height) ,interpolation=cv2.INTER_NEAREST)
@ -236,16 +238,18 @@ class sbb_predict:
            img_height = self.config_params_model['input_height']
            img_width = self.config_params_model['input_width']
            
-            tree_xml, root_xml, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = read_xml(self.xml_file)
+            tree_xml, root_xml, bb_coord_printspace, file_name, id_paragraph, id_header, co_text_paragraph, co_text_header, tot_region_ref, x_len, y_len, index_tot_regions, img_poly = read_xml(self.xml_file)
            _, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, _ = find_new_features_of_contours(co_text_header)
            
            img_header_and_sep = np.zeros((y_len,x_len), dtype='uint8')
+            

            for j in range(len(cy_main)):
                img_header_and_sep[int(y_max_main[j]):int(y_max_main[j])+12,int(x_min_main[j]):int(x_max_main[j]) ] = 1
                
            co_text_all = co_text_paragraph + co_text_header
            id_all_text = id_paragraph + id_header
+            

            ##texts_corr_order_index  = [index_tot_regions[tot_region_ref.index(i)] for i in id_all_text ]
            ##texts_corr_order_index_int = [int(x) for x in texts_corr_order_index]
@ -253,8 +257,9 @@ class sbb_predict:
            
            min_area = 0
            max_area = 1
+            

-            co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area)
+            ##co_text_all, texts_corr_order_index_int = filter_contours_area_of_image(img_poly, co_text_all, texts_corr_order_index_int, max_area, min_area)
            
            labels_con = np.zeros((y_len,x_len,len(co_text_all)),dtype='uint8')
            for i in range(len(co_text_all)):
@ -262,6 +267,18 @@ class sbb_predict:
                img_label=cv2.fillPoly(img_label, pts =[co_text_all[i]], color=(1,1,1))
                labels_con[:,:,i] = img_label[:,:,0]
                
+            if bb_coord_printspace:
+                #bb_coord_printspace[x,y,w,h,_,_]
+                x = bb_coord_printspace[0]
+                y = bb_coord_printspace[1]
+                w = bb_coord_printspace[2]
+                h = bb_coord_printspace[3]
+                labels_con = labels_con[y:y+h, x:x+w, :]
+                img_poly = img_poly[y:y+h, x:x+w, :]
+                img_header_and_sep = img_header_and_sep[y:y+h, x:x+w]
+                
+
+                
            img3= np.copy(img_poly)
            labels_con = resize_image(labels_con, img_height, img_width)

@ -347,9 +364,11 @@ class sbb_predict:
                    tot_counter = tot_counter+1
                        
                starting_list_of_regions, index_update = update_list_and_return_first_with_length_bigger_than_one(index_update, i, pr_list, post_list,starting_list_of_regions)
-                
+            
+            
            index_sort = [i[0] for i in starting_list_of_regions ]
            
+            id_all_text = np.array(id_all_text)[index_sort]
            
            alltags=[elem.tag for elem in root_xml.iter()]
            
@ -389,19 +408,17 @@ class sbb_predict:
            for index, id_text in enumerate(id_all_text):
                new_element_2 = ET.SubElement(ro_subelement2, 'RegionRefIndexed')
                new_element_2.set('regionRef', id_all_text[index])
-                new_element_2.set('index', str(index_sort[index]))
+                new_element_2.set('index', str(index))
            
-            if link+'PrintSpace' in alltags:
+            if (link+'PrintSpace' in alltags) or  (link+'Border' in alltags):
                page_element.insert(1, ro_subelement)
            else:
                page_element.insert(0, ro_subelement)
            
-            #page_element[0].append(new_element)
-            #root_xml.append(new_element)
            alltags=[elem.tag for elem in root_xml.iter()]
            
            ET.register_namespace("",name_space)
-            tree_xml.write('library2.xml',xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
+            tree_xml.write(os.path.join(self.out, file_name+'.xml'),xml_declaration=True,method='xml',encoding="utf8",default_namespace=None)
            #tree_xml.write('library2.xml')
            
        else:
@ -545,6 +562,12 @@ class sbb_predict:
    help="image filename",
    type=click.Path(exists=True, dir_okay=False),
 )
+@click.option(
+    "--out",
+    "-o",
+    help="output directory where xml with detected reading order will be written.",
+    type=click.Path(exists=True, file_okay=False),
+)
@click.option(
    "--patches/--no-patches",
    "-p/-nop",
@ -573,7 +596,7 @@ class sbb_predict:
    "-xml",
    help="xml file with layout coordinates that reading order detection will be implemented on. The result will be written in the same xml file.",
 )
-def main(image, model, patches, save, ground_truth, xml_file):
+def main(image, model, patches, save, ground_truth, xml_file, out):
    with open(os.path.join(model,'config.json')) as f:
        config_params_model = json.load(f)
    task = config_params_model['task']
@ -581,7 +604,7 @@ def main(image, model, patches, save, ground_truth, xml_file):
        if not save:
            print("Error: You used one of segmentation or binarization task but not set -s, you need a filename to save visualized output with -s")
            sys.exit(1)
-    x=sbb_predict(image, model, task, config_params_model, patches, save, ground_truth, xml_file)
+    x=sbb_predict(image, model, task, config_params_model, patches, save, ground_truth, xml_file, out)
    x.run()

 if __name__=="__main__":