mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-10-31 09:44:17 +01:00 
			
		
		
		
	Merge pull request #68 from mikegerber/fix/remove-spurious-textequivs
🧹 Don't produce spurious TextEquiv elements.
			
			
This commit is contained in:
		
						commit
						6a9d5d2076
					
				
					 2 changed files with 2 additions and 9 deletions
				
			
		|  | @ -21,7 +21,6 @@ from ocrd_models.ocrd_page import ( | ||||||
|     RegionRefType, |     RegionRefType, | ||||||
|     SeparatorRegionType, |     SeparatorRegionType, | ||||||
|     TableRegionType, |     TableRegionType, | ||||||
|     TextEquivType, |  | ||||||
|     TextLineType, |     TextLineType, | ||||||
|     TextRegionType, |     TextRegionType, | ||||||
|     UnorderedGroupIndexedType, |     UnorderedGroupIndexedType, | ||||||
|  |  | ||||||
|  | @ -10,7 +10,6 @@ from ocrd_utils import getLogger | ||||||
| from ocrd_models.ocrd_page import ( | from ocrd_models.ocrd_page import ( | ||||||
|         BorderType, |         BorderType, | ||||||
|         CoordsType, |         CoordsType, | ||||||
|         TextEquivType, |  | ||||||
|         PcGtsType, |         PcGtsType, | ||||||
|         TextLineType, |         TextLineType, | ||||||
|         TextRegionType, |         TextRegionType, | ||||||
|  | @ -59,7 +58,6 @@ class EynollahXmlWriter(): | ||||||
|             coords = CoordsType() |             coords = CoordsType() | ||||||
|             textline = TextLineType(id=counter.next_line_id, Coords=coords) |             textline = TextLineType(id=counter.next_line_id, Coords=coords) | ||||||
|             marginal_region.add_TextLine(textline) |             marginal_region.add_TextLine(textline) | ||||||
|             textline.add_TextEquiv(TextEquivType(Unicode='')) |  | ||||||
|             points_co = '' |             points_co = '' | ||||||
|             for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])): |             for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])): | ||||||
|                 if not self.curved_line: |                 if not self.curved_line: | ||||||
|  | @ -98,7 +96,7 @@ class EynollahXmlWriter(): | ||||||
|         self.logger.debug('enter serialize_lines_in_region') |         self.logger.debug('enter serialize_lines_in_region') | ||||||
|         for j in range(len(all_found_texline_polygons[region_idx])): |         for j in range(len(all_found_texline_polygons[region_idx])): | ||||||
|             coords = CoordsType() |             coords = CoordsType() | ||||||
|             textline = TextLineType(id=counter.next_line_id, Coords=coords, TextEquiv=[TextEquivType(index=0, Unicode='')]) |             textline = TextLineType(id=counter.next_line_id, Coords=coords) | ||||||
|             text_region.add_TextLine(textline) |             text_region.add_TextLine(textline) | ||||||
|             region_bboxes = all_box_coord[region_idx] |             region_bboxes = all_box_coord[region_idx] | ||||||
|             points_co = '' |             points_co = '' | ||||||
|  | @ -158,7 +156,7 @@ class EynollahXmlWriter(): | ||||||
|         for mm in range(len(found_polygons_text_region)): |         for mm in range(len(found_polygons_text_region)): | ||||||
|             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', |             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', | ||||||
|                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), |                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord)), | ||||||
|                     TextEquiv=[TextEquivType(index=0, Unicode='')]) |                     ) | ||||||
|             page.add_TextRegion(textregion) |             page.add_TextRegion(textregion) | ||||||
|             self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter) |             self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter) | ||||||
| 
 | 
 | ||||||
|  | @ -217,7 +215,6 @@ class EynollahXmlWriter(): | ||||||
| 
 | 
 | ||||||
|         for mm in range(len(found_polygons_text_region)): |         for mm in range(len(found_polygons_text_region)): | ||||||
|             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', |             textregion = TextRegionType(id=counter.next_region_id, type_='paragraph', | ||||||
|                     TextEquiv=[TextEquivType(index=0, Unicode='')], |  | ||||||
|                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) |                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region[mm], page_coord))) | ||||||
|             page.add_TextRegion(textregion) |             page.add_TextRegion(textregion) | ||||||
|             self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter) |             self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, counter) | ||||||
|  | @ -225,21 +222,18 @@ class EynollahXmlWriter(): | ||||||
|         self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) |         self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h)) | ||||||
|         for mm in range(len(found_polygons_text_region_h)): |         for mm in range(len(found_polygons_text_region_h)): | ||||||
|             textregion = TextRegionType(id=counter.next_region_id, type_='header', |             textregion = TextRegionType(id=counter.next_region_id, type_='header', | ||||||
|                     TextEquiv=[TextEquivType(index=0, Unicode='')], |  | ||||||
|                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) |                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_text_region_h[mm], page_coord))) | ||||||
|             page.add_TextRegion(textregion) |             page.add_TextRegion(textregion) | ||||||
|             self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter) |             self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes_h, counter) | ||||||
| 
 | 
 | ||||||
|         for mm in range(len(found_polygons_marginals)): |         for mm in range(len(found_polygons_marginals)): | ||||||
|             marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', |             marginal = TextRegionType(id=counter.next_region_id, type_='marginalia', | ||||||
|                     TextEquiv=[TextEquivType(index=0, Unicode='')], |  | ||||||
|                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) |                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_marginals[mm], page_coord))) | ||||||
|             page.add_TextRegion(marginal) |             page.add_TextRegion(marginal) | ||||||
|             self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) |             self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, counter) | ||||||
| 
 | 
 | ||||||
|         for mm in range(len(found_polygons_drop_capitals)): |         for mm in range(len(found_polygons_drop_capitals)): | ||||||
|             page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital', |             page.add_TextRegion(TextRegionType(id=counter.next_region_id, type_='drop-capital', | ||||||
|                     TextEquiv=[TextEquivType(index=0, Unicode='')], |  | ||||||
|                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))) |                     Coords=CoordsType(points=self.calculate_polygon_coords(found_polygons_drop_capitals[mm], page_coord)))) | ||||||
| 
 | 
 | ||||||
|         for mm in range(len(found_polygons_text_region_img)): |         for mm in range(len(found_polygons_text_region_img)): | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue