mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-11-04 11:44:15 +01:00 
			
		
		
		
	The text region coordinates are now correctly written into the XML output when using the skip layout and reading order option
This commit is contained in:
		
							parent
							
								
									83211ae684
								
							
						
					
					
						commit
						21ec4fbfb5
					
				
					 2 changed files with 21 additions and 11 deletions
				
			
		| 
						 | 
					@ -4333,7 +4333,7 @@ class Eynollah:
 | 
				
			||||||
                cont_page, page_coord, order_text_new, id_of_texts_tot,
 | 
					                cont_page, page_coord, order_text_new, id_of_texts_tot,
 | 
				
			||||||
                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
 | 
					                all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals,
 | 
				
			||||||
                all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
 | 
					                all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals,
 | 
				
			||||||
                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions)
 | 
					                cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, conf_contours_textregions, self.skip_layout_and_reading_order)
 | 
				
			||||||
            return pcgts
 | 
					            return pcgts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        #print("text region early -1 in %.1fs", time.time() - t0)
 | 
					        #print("text region early -1 in %.1fs", time.time() - t0)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -168,7 +168,7 @@ class EynollahXmlWriter():
 | 
				
			||||||
        with open(self.output_filename, 'w') as f:
 | 
					        with open(self.output_filename, 'w') as f:
 | 
				
			||||||
            f.write(to_xml(pcgts))
 | 
					            f.write(to_xml(pcgts))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion):
 | 
					    def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_textline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page, polygons_lines_to_be_written_in_xml, found_polygons_tables, ocr_all_textlines, conf_contours_textregion, skip_layout_reading_order=False):
 | 
				
			||||||
        self.logger.debug('enter build_pagexml_no_full_layout')
 | 
					        self.logger.debug('enter build_pagexml_no_full_layout')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # create the file structure
 | 
					        # create the file structure
 | 
				
			||||||
| 
						 | 
					@ -184,7 +184,7 @@ class EynollahXmlWriter():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for mm in range(len(found_polygons_text_region)):
 | 
					        for mm in range(len(found_polygons_text_region)):
 | 
				
			||||||
            textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
 | 
					            textregion = TextRegionType(id=counter.next_region_id, type_='paragraph',
 | 
				
			||||||
                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord), conf=conf_contours_textregion[mm]),
 | 
					                    Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord, skip_layout_reading_order), conf=conf_contours_textregion[mm]),
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
            #textregion.set_conf(conf_contours_textregion[mm])
 | 
					            #textregion.set_conf(conf_contours_textregion[mm])
 | 
				
			||||||
            page.add_TextRegion(textregion)
 | 
					            page.add_TextRegion(textregion)
 | 
				
			||||||
| 
						 | 
					@ -303,10 +303,20 @@ class EynollahXmlWriter():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return pcgts
 | 
					        return pcgts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def calculate_polygon_coords(self, contour, page_coord):
 | 
					    def calculate_polygon_coords(self, contour, page_coord, skip_layout_reading_order=False):
 | 
				
			||||||
        self.logger.debug('enter calculate_polygon_coords')
 | 
					        self.logger.debug('enter calculate_polygon_coords')
 | 
				
			||||||
        coords = ''
 | 
					        coords = ''
 | 
				
			||||||
        for value_bbox in contour:
 | 
					        for value_bbox in contour:
 | 
				
			||||||
 | 
					            if skip_layout_reading_order:
 | 
				
			||||||
 | 
					                if len(value_bbox) == 2:
 | 
				
			||||||
 | 
					                    coords += str(int((value_bbox[0]) / self.scale_x))
 | 
				
			||||||
 | 
					                    coords += ','
 | 
				
			||||||
 | 
					                    coords += str(int((value_bbox[1]) / self.scale_y))
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    coords += str(int((value_bbox[0][0]) / self.scale_x))
 | 
				
			||||||
 | 
					                    coords += ','
 | 
				
			||||||
 | 
					                    coords += str(int((value_bbox[0][1]) / self.scale_y))
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
                if len(value_bbox) == 2:
 | 
					                if len(value_bbox) == 2:
 | 
				
			||||||
                    coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
 | 
					                    coords += str(int((value_bbox[0] + page_coord[2]) / self.scale_x))
 | 
				
			||||||
                    coords += ','
 | 
					                    coords += ','
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue