mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
outfactor serialize_lines_in_region
This commit is contained in:
parent
4a5c99008a
commit
e11d9adfea
1 changed files with 84 additions and 191 deletions
|
@ -1404,6 +1404,83 @@ class eynollah:
|
|||
poly.put(poly_sub)
|
||||
box_sub.put(boxes_sub_new)
|
||||
|
||||
def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes):
|
||||
for j in range(len(all_found_texline_polygons[region_idx])):
|
||||
textline=ET.SubElement(textregion, 'TextLine')
|
||||
textline.set('id','l'+str(id_indexer_l))
|
||||
id_indexer_l+=1
|
||||
coord = ET.SubElement(textline, 'Coords')
|
||||
texteq = ET.SubElement(textline, 'TextEquiv')
|
||||
uni = ET.SubElement(texteq, 'Unicode')
|
||||
uni.text = ' '
|
||||
|
||||
#points = ET.SubElement(coord, 'Points')
|
||||
|
||||
points_co=''
|
||||
for l in range(len(all_found_texline_polygons[region_idx][j])):
|
||||
if not self.curved_line:
|
||||
#point.set('x',str(found_polygons[j][l][0]))
|
||||
#point.set('y',str(found_polygons[j][l][1]))
|
||||
if len(all_found_texline_polygons[region_idx][j][l])==2:
|
||||
textline_x_coord=int( (all_found_texline_polygons[region_idx][j][l][0]
|
||||
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x)
|
||||
textline_y_coord=int( (all_found_texline_polygons[region_idx][j][l][1]
|
||||
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y)
|
||||
|
||||
if textline_x_coord<0:
|
||||
textline_x_coord=0
|
||||
if textline_y_coord<0:
|
||||
textline_y_coord=0
|
||||
points_co=points_co+str( textline_x_coord )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( textline_y_coord )
|
||||
else:
|
||||
|
||||
textline_x_coord=int( ( all_found_texline_polygons[region_idx][j][l][0][0]
|
||||
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x )
|
||||
textline_y_coord=int( ( all_found_texline_polygons[region_idx][j][l][0][1]
|
||||
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y)
|
||||
|
||||
if textline_x_coord<0:
|
||||
textline_x_coord=0
|
||||
if textline_y_coord<0:
|
||||
textline_y_coord=0
|
||||
|
||||
points_co=points_co+str( textline_x_coord )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( textline_y_coord )
|
||||
|
||||
if (self.curved_line) and np.abs(slopes[region_idx]) <= 45 :
|
||||
if len(all_found_texline_polygons[region_idx][j][l])==2:
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][0]
|
||||
+page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][1]
|
||||
+page_coord[0])/self.scale_y) )
|
||||
else:
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][0]
|
||||
+page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][1]
|
||||
+page_coord[0])/self.scale_y) )
|
||||
elif (self.curved_line) and np.abs(slopes[region_idx]) > 45 :
|
||||
if len(all_found_texline_polygons[region_idx][j][l])==2:
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][0]
|
||||
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][1]
|
||||
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y) )
|
||||
else:
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][0]
|
||||
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][1]
|
||||
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y) )
|
||||
|
||||
if l<(len(all_found_texline_polygons[region_idx][j])-1):
|
||||
points_co=points_co+' '
|
||||
coord.set('points',points_co)
|
||||
|
||||
def write_into_page_xml_full(self, contours, contours_h, page_coord, dir_of_image, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals):
|
||||
|
||||
found_polygons_text_region = contours
|
||||
|
@ -1460,110 +1537,20 @@ class eynollah:
|
|||
points_co=''
|
||||
for lmm in range(len(found_polygons_text_region[mm])):
|
||||
if len(found_polygons_text_region[mm][lmm])==2:
|
||||
points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][0] +page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+str(int((found_polygons_text_region[mm][lmm][0] + page_coord[2]) / self.scale_x))
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][1] +page_coord[0])/self.scale_y ) )
|
||||
points_co=points_co+str(int((found_polygons_text_region[mm][lmm][1] + page_coord[0]) / self.scale_y))
|
||||
else:
|
||||
points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][0] +page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+str(int((found_polygons_text_region[mm][lmm][0][0] + page_coord[2]) / self.scale_x))
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][1] +page_coord[0])/self.scale_y) )
|
||||
points_co=points_co+str(int((found_polygons_text_region[mm][lmm][0][1] + page_coord[0]) / self.scale_y))
|
||||
|
||||
if lmm<(len(found_polygons_text_region[mm])-1):
|
||||
points_co=points_co+' '
|
||||
#print(points_co)
|
||||
coord_text.set('points',points_co)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
for j in range(len(all_found_texline_polygons[mm])):
|
||||
|
||||
textline=ET.SubElement(textregion, 'TextLine')
|
||||
|
||||
textline.set('id','l'+str(id_indexer_l))
|
||||
|
||||
id_indexer_l+=1
|
||||
|
||||
|
||||
coord = ET.SubElement(textline, 'Coords')
|
||||
|
||||
texteq=ET.SubElement(textline, 'TextEquiv')
|
||||
|
||||
uni=ET.SubElement(texteq, 'Unicode')
|
||||
uni.text = ' '
|
||||
|
||||
#points = ET.SubElement(coord, 'Points')
|
||||
|
||||
points_co=''
|
||||
for l in range(len(all_found_texline_polygons[mm][j])):
|
||||
#point = ET.SubElement(coord, 'Point')
|
||||
|
||||
|
||||
if not self.curved_line:
|
||||
#point.set('x',str(found_polygons[j][l][0]))
|
||||
#point.set('y',str(found_polygons[j][l][1]))
|
||||
if len(all_found_texline_polygons[mm][j][l])==2:
|
||||
textline_x_coord=int( (all_found_texline_polygons[mm][j][l][0]
|
||||
+all_box_coord[mm][2]+page_coord[2])/self.scale_x)
|
||||
textline_y_coord=int( (all_found_texline_polygons[mm][j][l][1]
|
||||
+all_box_coord[mm][0]+page_coord[0])/self.scale_y)
|
||||
|
||||
if textline_x_coord<0:
|
||||
textline_x_coord=0
|
||||
if textline_y_coord<0:
|
||||
textline_y_coord=0
|
||||
points_co=points_co+str( textline_x_coord )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( textline_y_coord )
|
||||
else:
|
||||
|
||||
textline_x_coord=int( ( all_found_texline_polygons[mm][j][l][0][0]
|
||||
+all_box_coord[mm][2]+page_coord[2])/self.scale_x )
|
||||
textline_y_coord=int( ( all_found_texline_polygons[mm][j][l][0][1]
|
||||
+all_box_coord[mm][0]+page_coord[0])/self.scale_y)
|
||||
|
||||
if textline_x_coord<0:
|
||||
textline_x_coord=0
|
||||
if textline_y_coord<0:
|
||||
textline_y_coord=0
|
||||
|
||||
points_co=points_co+str( textline_x_coord )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( textline_y_coord )
|
||||
|
||||
if (self.curved_line) and np.abs(slopes[mm]) <= 45 :
|
||||
if len(all_found_texline_polygons[mm][j][l])==2:
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][0]
|
||||
+page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][1]
|
||||
+page_coord[0])/self.scale_y) )
|
||||
else:
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][0]
|
||||
+page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][1]
|
||||
+page_coord[0])/self.scale_y) )
|
||||
elif (self.curved_line) and np.abs(slopes[mm]) > 45 :
|
||||
if len(all_found_texline_polygons[mm][j][l])==2:
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][0]
|
||||
+all_box_coord[mm][2]+page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][1]
|
||||
+all_box_coord[mm][0]+page_coord[0])/self.scale_y) )
|
||||
else:
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][0]
|
||||
+all_box_coord[mm][2]+page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][1]
|
||||
+all_box_coord[mm][0]+page_coord[0])/self.scale_y) )
|
||||
|
||||
if l<(len(all_found_texline_polygons[mm][j])-1):
|
||||
points_co=points_co+' '
|
||||
#print(points_co)
|
||||
coord.set('points',points_co)
|
||||
|
||||
self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord)
|
||||
texteqreg=ET.SubElement(textregion, 'TextEquiv')
|
||||
|
||||
unireg=ET.SubElement(texteqreg, 'Unicode')
|
||||
|
@ -1590,102 +1577,8 @@ class eynollah:
|
|||
# textregion.set('type','paragraph')
|
||||
coord_text = ET.SubElement(textregion, 'Coords')
|
||||
|
||||
points_co=''
|
||||
for lmm in range(len(found_polygons_text_region_h[mm])):
|
||||
|
||||
if len(found_polygons_text_region_h[mm][lmm])==2:
|
||||
|
||||
points_co=points_co+str( int( (found_polygons_text_region_h[mm][lmm][0] +page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (found_polygons_text_region_h[mm][lmm][1] +page_coord[0])/self.scale_y ) )
|
||||
else:
|
||||
points_co=points_co+str( int((found_polygons_text_region_h[mm][lmm][0][0] +page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int((found_polygons_text_region_h[mm][lmm][0][1] +page_coord[0])/self.scale_y) )
|
||||
|
||||
if lmm<(len(found_polygons_text_region_h[mm])-1):
|
||||
points_co=points_co+' '
|
||||
#print(points_co)
|
||||
coord_text.set('points',points_co)
|
||||
|
||||
|
||||
for j in range(len(all_found_texline_polygons_h[mm])):
|
||||
|
||||
textline=ET.SubElement(textregion, 'TextLine')
|
||||
|
||||
textline.set('id','l'+str(id_indexer_l))
|
||||
|
||||
id_indexer_l+=1
|
||||
|
||||
|
||||
coord = ET.SubElement(textline, 'Coords')
|
||||
|
||||
texteq=ET.SubElement(textline, 'TextEquiv')
|
||||
|
||||
uni=ET.SubElement(texteq, 'Unicode')
|
||||
uni.text = ' '
|
||||
|
||||
#points = ET.SubElement(coord, 'Points')
|
||||
|
||||
points_co=''
|
||||
for l in range(len(all_found_texline_polygons_h[mm][j])):
|
||||
#point = ET.SubElement(coord, 'Point')
|
||||
|
||||
|
||||
if not self.curved_line:
|
||||
#point.set('x',str(found_polygons[j][l][0]))
|
||||
#point.set('y',str(found_polygons[j][l][1]))
|
||||
if len(all_found_texline_polygons_h[mm][j][l])==2:
|
||||
|
||||
textline_x_coord=int( (all_found_texline_polygons_h[mm][j][l][0]
|
||||
+all_box_coord_h[mm][2]+page_coord[2])/self.scale_x)
|
||||
textline_y_coord=int( (all_found_texline_polygons_h[mm][j][l][1]
|
||||
+all_box_coord_h[mm][0]+page_coord[0])/self.scale_y)
|
||||
|
||||
if textline_x_coord<0:
|
||||
textline_x_coord=0
|
||||
if textline_y_coord<0:
|
||||
textline_y_coord=0
|
||||
|
||||
points_co=points_co+str( textline_x_coord )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( textline_y_coord )
|
||||
else:
|
||||
|
||||
|
||||
textline_x_coord=int( ( all_found_texline_polygons_h[mm][j][l][0][0]
|
||||
+all_box_coord_h[mm][2]+page_coord[2])/self.scale_x )
|
||||
textline_y_coord=int( ( all_found_texline_polygons_h[mm][j][l][0][1]
|
||||
+all_box_coord_h[mm][0]+page_coord[0])/self.scale_y)
|
||||
|
||||
if textline_x_coord<0:
|
||||
textline_x_coord=0
|
||||
if textline_y_coord<0:
|
||||
textline_y_coord=0
|
||||
|
||||
points_co=points_co+str( textline_x_coord )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( textline_y_coord)
|
||||
|
||||
if self.curved_line:
|
||||
if len(all_found_texline_polygons_h[mm][j][l])==2:
|
||||
points_co=points_co+str( int( (all_found_texline_polygons_h[mm][j][l][0]
|
||||
+page_coord[2])/self.scale_x) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( (all_found_texline_polygons_h[mm][j][l][1]
|
||||
+page_coord[0])/self.scale_y) )
|
||||
else:
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons_h[mm][j][l][0][0]
|
||||
+page_coord[2])/self.scale_x ) )
|
||||
points_co=points_co+','
|
||||
points_co=points_co+str( int( ( all_found_texline_polygons_h[mm][j][l][0][1]
|
||||
+page_coord[0])/self.scale_y) )
|
||||
|
||||
if l<(len(all_found_texline_polygons_h[mm][j])-1):
|
||||
points_co=points_co+' '
|
||||
#print(points_co)
|
||||
coord.set('points',points_co)
|
||||
|
||||
self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord, slopes)
|
||||
texteqreg=ET.SubElement(textregion, 'TextEquiv')
|
||||
|
||||
unireg=ET.SubElement(texteqreg, 'Unicode')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue