outfactor serialize_lines_in_region

pull/19/head
Konstantin Baierer 4 years ago
parent 4a5c99008a
commit e11d9adfea

@ -1404,6 +1404,83 @@ class eynollah:
poly.put(poly_sub) poly.put(poly_sub)
box_sub.put(boxes_sub_new) box_sub.put(boxes_sub_new)
def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes):
for j in range(len(all_found_texline_polygons[region_idx])):
textline=ET.SubElement(textregion, 'TextLine')
textline.set('id','l'+str(id_indexer_l))
id_indexer_l+=1
coord = ET.SubElement(textline, 'Coords')
texteq = ET.SubElement(textline, 'TextEquiv')
uni = ET.SubElement(texteq, 'Unicode')
uni.text = ' '
#points = ET.SubElement(coord, 'Points')
points_co=''
for l in range(len(all_found_texline_polygons[region_idx][j])):
if not self.curved_line:
#point.set('x',str(found_polygons[j][l][0]))
#point.set('y',str(found_polygons[j][l][1]))
if len(all_found_texline_polygons[region_idx][j][l])==2:
textline_x_coord=int( (all_found_texline_polygons[region_idx][j][l][0]
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x)
textline_y_coord=int( (all_found_texline_polygons[region_idx][j][l][1]
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y)
if textline_x_coord<0:
textline_x_coord=0
if textline_y_coord<0:
textline_y_coord=0
points_co=points_co+str( textline_x_coord )
points_co=points_co+','
points_co=points_co+str( textline_y_coord )
else:
textline_x_coord=int( ( all_found_texline_polygons[region_idx][j][l][0][0]
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x )
textline_y_coord=int( ( all_found_texline_polygons[region_idx][j][l][0][1]
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y)
if textline_x_coord<0:
textline_x_coord=0
if textline_y_coord<0:
textline_y_coord=0
points_co=points_co+str( textline_x_coord )
points_co=points_co+','
points_co=points_co+str( textline_y_coord )
if (self.curved_line) and np.abs(slopes[region_idx]) <= 45 :
if len(all_found_texline_polygons[region_idx][j][l])==2:
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][0]
+page_coord[2])/self.scale_x) )
points_co=points_co+','
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][1]
+page_coord[0])/self.scale_y) )
else:
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][0]
+page_coord[2])/self.scale_x ) )
points_co=points_co+','
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][1]
+page_coord[0])/self.scale_y) )
elif (self.curved_line) and np.abs(slopes[region_idx]) > 45 :
if len(all_found_texline_polygons[region_idx][j][l])==2:
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][0]
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x) )
points_co=points_co+','
points_co=points_co+str( int( (all_found_texline_polygons[region_idx][j][l][1]
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y) )
else:
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][0]
+all_box_coord[region_idx][2]+page_coord[2])/self.scale_x ) )
points_co=points_co+','
points_co=points_co+str( int( ( all_found_texline_polygons[region_idx][j][l][0][1]
+all_box_coord[region_idx][0]+page_coord[0])/self.scale_y) )
if l<(len(all_found_texline_polygons[region_idx][j])-1):
points_co=points_co+' '
coord.set('points',points_co)
def write_into_page_xml_full(self, contours, contours_h, page_coord, dir_of_image, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals): def write_into_page_xml_full(self, contours, contours_h, page_coord, dir_of_image, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals):
found_polygons_text_region = contours found_polygons_text_region = contours
@ -1460,110 +1537,20 @@ class eynollah:
points_co='' points_co=''
for lmm in range(len(found_polygons_text_region[mm])): for lmm in range(len(found_polygons_text_region[mm])):
if len(found_polygons_text_region[mm][lmm])==2: if len(found_polygons_text_region[mm][lmm])==2:
points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][0] +page_coord[2])/self.scale_x ) ) points_co=points_co+str(int((found_polygons_text_region[mm][lmm][0] + page_coord[2]) / self.scale_x))
points_co=points_co+',' points_co=points_co+','
points_co=points_co+str( int( (found_polygons_text_region[mm][lmm][1] +page_coord[0])/self.scale_y ) ) points_co=points_co+str(int((found_polygons_text_region[mm][lmm][1] + page_coord[0]) / self.scale_y))
else: else:
points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][0] +page_coord[2])/self.scale_x) ) points_co=points_co+str(int((found_polygons_text_region[mm][lmm][0][0] + page_coord[2]) / self.scale_x))
points_co=points_co+',' points_co=points_co+','
points_co=points_co+str( int((found_polygons_text_region[mm][lmm][0][1] +page_coord[0])/self.scale_y) ) points_co=points_co+str(int((found_polygons_text_region[mm][lmm][0][1] + page_coord[0]) / self.scale_y))
if lmm<(len(found_polygons_text_region[mm])-1): if lmm<(len(found_polygons_text_region[mm])-1):
points_co=points_co+' ' points_co=points_co+' '
#print(points_co) #print(points_co)
coord_text.set('points',points_co) coord_text.set('points',points_co)
self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord)
for j in range(len(all_found_texline_polygons[mm])):
textline=ET.SubElement(textregion, 'TextLine')
textline.set('id','l'+str(id_indexer_l))
id_indexer_l+=1
coord = ET.SubElement(textline, 'Coords')
texteq=ET.SubElement(textline, 'TextEquiv')
uni=ET.SubElement(texteq, 'Unicode')
uni.text = ' '
#points = ET.SubElement(coord, 'Points')
points_co=''
for l in range(len(all_found_texline_polygons[mm][j])):
#point = ET.SubElement(coord, 'Point')
if not self.curved_line:
#point.set('x',str(found_polygons[j][l][0]))
#point.set('y',str(found_polygons[j][l][1]))
if len(all_found_texline_polygons[mm][j][l])==2:
textline_x_coord=int( (all_found_texline_polygons[mm][j][l][0]
+all_box_coord[mm][2]+page_coord[2])/self.scale_x)
textline_y_coord=int( (all_found_texline_polygons[mm][j][l][1]
+all_box_coord[mm][0]+page_coord[0])/self.scale_y)
if textline_x_coord<0:
textline_x_coord=0
if textline_y_coord<0:
textline_y_coord=0
points_co=points_co+str( textline_x_coord )
points_co=points_co+','
points_co=points_co+str( textline_y_coord )
else:
textline_x_coord=int( ( all_found_texline_polygons[mm][j][l][0][0]
+all_box_coord[mm][2]+page_coord[2])/self.scale_x )
textline_y_coord=int( ( all_found_texline_polygons[mm][j][l][0][1]
+all_box_coord[mm][0]+page_coord[0])/self.scale_y)
if textline_x_coord<0:
textline_x_coord=0
if textline_y_coord<0:
textline_y_coord=0
points_co=points_co+str( textline_x_coord )
points_co=points_co+','
points_co=points_co+str( textline_y_coord )
if (self.curved_line) and np.abs(slopes[mm]) <= 45 :
if len(all_found_texline_polygons[mm][j][l])==2:
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][0]
+page_coord[2])/self.scale_x) )
points_co=points_co+','
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][1]
+page_coord[0])/self.scale_y) )
else:
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][0]
+page_coord[2])/self.scale_x ) )
points_co=points_co+','
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][1]
+page_coord[0])/self.scale_y) )
elif (self.curved_line) and np.abs(slopes[mm]) > 45 :
if len(all_found_texline_polygons[mm][j][l])==2:
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][0]
+all_box_coord[mm][2]+page_coord[2])/self.scale_x) )
points_co=points_co+','
points_co=points_co+str( int( (all_found_texline_polygons[mm][j][l][1]
+all_box_coord[mm][0]+page_coord[0])/self.scale_y) )
else:
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][0]
+all_box_coord[mm][2]+page_coord[2])/self.scale_x ) )
points_co=points_co+','
points_co=points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][1]
+all_box_coord[mm][0]+page_coord[0])/self.scale_y) )
if l<(len(all_found_texline_polygons[mm][j])-1):
points_co=points_co+' '
#print(points_co)
coord.set('points',points_co)
texteqreg=ET.SubElement(textregion, 'TextEquiv') texteqreg=ET.SubElement(textregion, 'TextEquiv')
unireg=ET.SubElement(texteqreg, 'Unicode') unireg=ET.SubElement(texteqreg, 'Unicode')
@ -1590,102 +1577,8 @@ class eynollah:
# textregion.set('type','paragraph') # textregion.set('type','paragraph')
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
points_co=''
for lmm in range(len(found_polygons_text_region_h[mm])):
if len(found_polygons_text_region_h[mm][lmm])==2:
points_co=points_co+str( int( (found_polygons_text_region_h[mm][lmm][0] +page_coord[2])/self.scale_x ) )
points_co=points_co+','
points_co=points_co+str( int( (found_polygons_text_region_h[mm][lmm][1] +page_coord[0])/self.scale_y ) )
else:
points_co=points_co+str( int((found_polygons_text_region_h[mm][lmm][0][0] +page_coord[2])/self.scale_x) )
points_co=points_co+','
points_co=points_co+str( int((found_polygons_text_region_h[mm][lmm][0][1] +page_coord[0])/self.scale_y) )
if lmm<(len(found_polygons_text_region_h[mm])-1):
points_co=points_co+' '
#print(points_co)
coord_text.set('points',points_co)
for j in range(len(all_found_texline_polygons_h[mm])):
textline=ET.SubElement(textregion, 'TextLine')
textline.set('id','l'+str(id_indexer_l))
id_indexer_l+=1
coord = ET.SubElement(textline, 'Coords')
texteq=ET.SubElement(textline, 'TextEquiv')
uni=ET.SubElement(texteq, 'Unicode')
uni.text = ' '
#points = ET.SubElement(coord, 'Points')
points_co=''
for l in range(len(all_found_texline_polygons_h[mm][j])):
#point = ET.SubElement(coord, 'Point')
if not self.curved_line:
#point.set('x',str(found_polygons[j][l][0]))
#point.set('y',str(found_polygons[j][l][1]))
if len(all_found_texline_polygons_h[mm][j][l])==2:
textline_x_coord=int( (all_found_texline_polygons_h[mm][j][l][0]
+all_box_coord_h[mm][2]+page_coord[2])/self.scale_x)
textline_y_coord=int( (all_found_texline_polygons_h[mm][j][l][1]
+all_box_coord_h[mm][0]+page_coord[0])/self.scale_y)
if textline_x_coord<0:
textline_x_coord=0
if textline_y_coord<0:
textline_y_coord=0
points_co=points_co+str( textline_x_coord )
points_co=points_co+','
points_co=points_co+str( textline_y_coord )
else:
textline_x_coord=int( ( all_found_texline_polygons_h[mm][j][l][0][0]
+all_box_coord_h[mm][2]+page_coord[2])/self.scale_x )
textline_y_coord=int( ( all_found_texline_polygons_h[mm][j][l][0][1]
+all_box_coord_h[mm][0]+page_coord[0])/self.scale_y)
if textline_x_coord<0:
textline_x_coord=0
if textline_y_coord<0:
textline_y_coord=0
points_co=points_co+str( textline_x_coord )
points_co=points_co+','
points_co=points_co+str( textline_y_coord)
if self.curved_line:
if len(all_found_texline_polygons_h[mm][j][l])==2:
points_co=points_co+str( int( (all_found_texline_polygons_h[mm][j][l][0]
+page_coord[2])/self.scale_x) )
points_co=points_co+','
points_co=points_co+str( int( (all_found_texline_polygons_h[mm][j][l][1]
+page_coord[0])/self.scale_y) )
else:
points_co=points_co+str( int( ( all_found_texline_polygons_h[mm][j][l][0][0]
+page_coord[2])/self.scale_x ) )
points_co=points_co+','
points_co=points_co+str( int( ( all_found_texline_polygons_h[mm][j][l][0][1]
+page_coord[0])/self.scale_y) )
if l<(len(all_found_texline_polygons_h[mm][j])-1):
points_co=points_co+' '
#print(points_co)
coord.set('points',points_co)
self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord, slopes)
texteqreg=ET.SubElement(textregion, 'TextEquiv') texteqreg=ET.SubElement(textregion, 'TextEquiv')
unireg=ET.SubElement(texteqreg, 'Unicode') unireg=ET.SubElement(texteqreg, 'Unicode')

Loading…
Cancel
Save