|
|
@ -1506,13 +1506,13 @@ class eynollah:
|
|
|
|
points_page_print = ""
|
|
|
|
points_page_print = ""
|
|
|
|
for lmm in range(len(self.cont_page[0])):
|
|
|
|
for lmm in range(len(self.cont_page[0])):
|
|
|
|
if len(self.cont_page[0][lmm]) == 2:
|
|
|
|
if len(self.cont_page[0][lmm]) == 2:
|
|
|
|
points_page_print = points_page_print + str(int((self.cont_page[0][lmm][0] ) / self.scale_x))
|
|
|
|
points_page_print += str(int((self.cont_page[0][lmm][0] ) / self.scale_x))
|
|
|
|
points_page_print = points_page_print + ','
|
|
|
|
points_page_print += ','
|
|
|
|
points_page_print = points_page_print + str(int((self.cont_page[0][lmm][1] ) / self.scale_y))
|
|
|
|
points_page_print += str(int((self.cont_page[0][lmm][1] ) / self.scale_y))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
points_page_print = points_page_print + str(int((self.cont_page[0][lmm][0][0]) / self.scale_x))
|
|
|
|
points_page_print += str(int((self.cont_page[0][lmm][0][0]) / self.scale_x))
|
|
|
|
points_page_print = points_page_print + ','
|
|
|
|
points_page_print += ','
|
|
|
|
points_page_print = points_page_print + str(int((self.cont_page[0][lmm][0][1] ) / self.scale_y))
|
|
|
|
points_page_print += str(int((self.cont_page[0][lmm][0][1] ) / self.scale_y))
|
|
|
|
|
|
|
|
|
|
|
|
if lmm < (len( self.cont_page[0] ) - 1):
|
|
|
|
if lmm < (len( self.cont_page[0] ) - 1):
|
|
|
|
points_page_print = points_page_print + ' '
|
|
|
|
points_page_print = points_page_print + ' '
|
|
|
@ -1564,20 +1564,11 @@ class eynollah:
|
|
|
|
|
|
|
|
|
|
|
|
for mm in range(len(found_polygons_text_region)):
|
|
|
|
for mm in range(len(found_polygons_text_region)):
|
|
|
|
textregion=ET.SubElement(page, 'TextRegion')
|
|
|
|
textregion=ET.SubElement(page, 'TextRegion')
|
|
|
|
|
|
|
|
|
|
|
|
textregion.set('id', 'r'+str(id_indexer))
|
|
|
|
textregion.set('id', 'r'+str(id_indexer))
|
|
|
|
id_indexer += 1
|
|
|
|
id_indexer += 1
|
|
|
|
|
|
|
|
|
|
|
|
textregion.set('type', 'paragraph')
|
|
|
|
textregion.set('type', 'paragraph')
|
|
|
|
#if mm==0:
|
|
|
|
|
|
|
|
# textregion.set('type','header')
|
|
|
|
|
|
|
|
#else:
|
|
|
|
|
|
|
|
# textregion.set('type','paragraph')
|
|
|
|
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text = ET.SubElement(textregion, 'Coords')
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
|
|
|
|
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for j in range(len(all_found_texline_polygons[mm])):
|
|
|
|
for j in range(len(all_found_texline_polygons[mm])):
|
|
|
|
textline=ET.SubElement(textregion, 'TextLine')
|
|
|
|
textline=ET.SubElement(textregion, 'TextLine')
|
|
|
|
textline.set('id', 'l' + str(id_indexer_l))
|
|
|
|
textline.set('id', 'l' + str(id_indexer_l))
|
|
|
@ -1586,13 +1577,10 @@ class eynollah:
|
|
|
|
texteq=ET.SubElement(textline, 'TextEquiv')
|
|
|
|
texteq=ET.SubElement(textline, 'TextEquiv')
|
|
|
|
uni=ET.SubElement(texteq, 'Unicode')
|
|
|
|
uni=ET.SubElement(texteq, 'Unicode')
|
|
|
|
uni.text = ' '
|
|
|
|
uni.text = ' '
|
|
|
|
#points = ET.SubElement(coord, 'Points')
|
|
|
|
|
|
|
|
points_co=''
|
|
|
|
points_co=''
|
|
|
|
for l in range(len(all_found_texline_polygons[mm][j])):
|
|
|
|
for l in range(len(all_found_texline_polygons[mm][j])):
|
|
|
|
#point = ET.SubElement(coord, 'Point')
|
|
|
|
#point = ET.SubElement(coord, 'Point')
|
|
|
|
if not curved_line:
|
|
|
|
if not curved_line:
|
|
|
|
#point.set('x',str(found_polygons[j][l][0]))
|
|
|
|
|
|
|
|
#point.set('y',str(found_polygons[j][l][1]))
|
|
|
|
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
textline_y_coord = max(0, int((all_found_texline_polygons[mm][j][l][1] + all_box_coord[mm][0] + page_coord[0]) / self.scale_y))
|
|
|
|
textline_y_coord = max(0, int((all_found_texline_polygons[mm][j][l][1] + all_box_coord[mm][0] + page_coord[0]) / self.scale_y))
|
|
|
@ -1601,38 +1589,27 @@ class eynollah:
|
|
|
|
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0][0] + all_box_coord[mm][2]+page_coord[2]) / self.scale_x))
|
|
|
|
textline_x_coord = max(0, int((all_found_texline_polygons[mm][j][l][0][0] + all_box_coord[mm][2]+page_coord[2]) / self.scale_x))
|
|
|
|
textline_y_coord = max(0, int((all_found_texline_polygons[mm][j][l][0][1] + all_box_coord[mm][0]+page_coord[0]) / self.scale_y))
|
|
|
|
textline_y_coord = max(0, int((all_found_texline_polygons[mm][j][l][0][1] + all_box_coord[mm][0]+page_coord[0]) / self.scale_y))
|
|
|
|
points_co += str(textline_x_coord) + ',' + str(textline_y_coord)
|
|
|
|
points_co += str(textline_x_coord) + ',' + str(textline_y_coord)
|
|
|
|
|
|
|
|
if curved_line and abs(slopes[mm]) <= 45:
|
|
|
|
if (self.curved_line) and abs(slopes[mm]) <= 45:
|
|
|
|
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
points_co=points_co + str( int( (all_found_texline_polygons[mm][j][l][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons[mm][j][l][0] + page_coord[2]) / self.scale_x))
|
|
|
|
+ page_coord[2]) / self.scale_x) )
|
|
|
|
points_co += ','
|
|
|
|
points_co = points_co + ','
|
|
|
|
points_co += str(int((all_found_texline_polygons[mm][j][l][1] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co = points_co + str( int( (all_found_texline_polygons[mm][j][l][1]
|
|
|
|
|
|
|
|
+ page_coord[0]) / self.scale_y) )
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
points_co = points_co + str( int( ( all_found_texline_polygons[mm][j][l][0][0]
|
|
|
|
points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][0] + page_coord[2]) / self.scale_x))
|
|
|
|
+ page_coord[2]) / self.scale_x ) )
|
|
|
|
|
|
|
|
points_co = points_co + ','
|
|
|
|
points_co = points_co + ','
|
|
|
|
points_co = points_co + str( int( ( all_found_texline_polygons[mm][j][l][0][1]
|
|
|
|
points_co = points_co + str(int((all_found_texline_polygons[mm][j][l][0][1] + page_coord[0]) / self.scale_y))
|
|
|
|
+ page_coord[0]) / self.scale_y) )
|
|
|
|
elif curved_line and abs(slopes[mm]) > 45:
|
|
|
|
|
|
|
|
|
|
|
|
elif (self.curved_line) and abs(slopes[mm]) > 45:
|
|
|
|
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
if len(all_found_texline_polygons[mm][j][l]) == 2:
|
|
|
|
points_co = points_co + str( int( (all_found_texline_polygons[mm][j][l][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons[mm][j][l][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
+ all_box_coord[mm][2] + page_coord[2]) / self.scale_x) )
|
|
|
|
points_co += ','
|
|
|
|
points_co = points_co + ','
|
|
|
|
points_co += str(int((all_found_texline_polygons[mm][j][l][1] + all_box_coord[mm][0] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co = points_co + str( int( (all_found_texline_polygons[mm][j][l][1]
|
|
|
|
|
|
|
|
+ all_box_coord[mm][0] + page_coord[0]) / self.scale_y) )
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
points_co = points_co + str( int( ( all_found_texline_polygons[mm][j][l][0][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons[mm][j][l][0][0] + all_box_coord[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
+ all_box_coord[mm][2] + page_coord[2]) / self.scale_x ) )
|
|
|
|
points_co += ','
|
|
|
|
points_co = points_co + ','
|
|
|
|
points_co += str(int((all_found_texline_polygons[mm][j][l][0][1] + all_box_coord[mm][0] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co = points_co+str( int( ( all_found_texline_polygons[mm][j][l][0][1]
|
|
|
|
|
|
|
|
+ all_box_coord[mm][0] + page_coord[0]) / self.scale_y) )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if l < (len(all_found_texline_polygons[mm][j]) - 1):
|
|
|
|
if l < len(all_found_texline_polygons[mm][j]) - 1:
|
|
|
|
points_co = points_co + ' '
|
|
|
|
points_co += ' '
|
|
|
|
#print(points_co)
|
|
|
|
|
|
|
|
coord.set('points', points_co)
|
|
|
|
coord.set('points', points_co)
|
|
|
|
|
|
|
|
|
|
|
|
texteqreg = ET.SubElement(textregion, 'TextEquiv')
|
|
|
|
texteqreg = ET.SubElement(textregion, 'TextEquiv')
|
|
|
@ -1663,33 +1640,24 @@ class eynollah:
|
|
|
|
for l in range(len(all_found_texline_polygons_marginals[mm][j])):
|
|
|
|
for l in range(len(all_found_texline_polygons_marginals[mm][j])):
|
|
|
|
if not curved_line:
|
|
|
|
if not curved_line:
|
|
|
|
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
|
|
|
|
if len(all_found_texline_polygons_marginals[mm][j][l]) == 2:
|
|
|
|
points_co=points_co+str( int( (all_found_texline_polygons_marginals[mm][j][l][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
+all_box_coord_marginals[mm][2]+page_coord[2])/self.scale_x) )
|
|
|
|
points_co += ','
|
|
|
|
points_co=points_co+','
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + all_box_coord_marginals[mm][0] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co=points_co+str( int( (all_found_texline_polygons_marginals[mm][j][l][1]
|
|
|
|
|
|
|
|
+all_box_coord_marginals[mm][0]+page_coord[0])/self.scale_y) )
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
points_co=points_co+str( int( ( all_found_texline_polygons_marginals[mm][j][l][0][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + all_box_coord_marginals[mm][2] + page_coord[2]) / self.scale_x))
|
|
|
|
+all_box_coord_marginals[mm][2]+page_coord[2])/self.scale_x ) )
|
|
|
|
points_co += ','
|
|
|
|
points_co=points_co+','
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + all_box_coord_marginals[mm][0] + page_coord[0])/self.scale_y))
|
|
|
|
points_co=points_co+str( int( ( all_found_texline_polygons_marginals[mm][j][l][0][1]
|
|
|
|
|
|
|
|
+all_box_coord_marginals[mm][0]+page_coord[0])/self.scale_y) )
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
if len(all_found_texline_polygons_marginals[mm][j][l])==2:
|
|
|
|
if len(all_found_texline_polygons_marginals[mm][j][l])==2:
|
|
|
|
points_co=points_co+str( int( (all_found_texline_polygons_marginals[mm][j][l][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0] + page_coord[2]) / self.scale_x))
|
|
|
|
+page_coord[2])/self.scale_x) )
|
|
|
|
points_co += ','
|
|
|
|
points_co=points_co+','
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][1] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co=points_co+str( int( (all_found_texline_polygons_marginals[mm][j][l][1]
|
|
|
|
|
|
|
|
+page_coord[0])/self.scale_y) )
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
points_co=points_co+str( int( ( all_found_texline_polygons_marginals[mm][j][l][0][0]
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][0] + page_coord[2]) / self.scale_x))
|
|
|
|
+page_coord[2])/self.scale_x ) )
|
|
|
|
points_co += ','
|
|
|
|
points_co=points_co+','
|
|
|
|
points_co += str(int((all_found_texline_polygons_marginals[mm][j][l][0][1] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co=points_co+str( int( ( all_found_texline_polygons_marginals[mm][j][l][0][1]
|
|
|
|
if l < len(all_found_texline_polygons_marginals[mm][j]) - 1:
|
|
|
|
+page_coord[0])/self.scale_y) )
|
|
|
|
points_co += ' '
|
|
|
|
if l<(len(all_found_texline_polygons_marginals[mm][j])-1):
|
|
|
|
|
|
|
|
points_co=points_co+' '
|
|
|
|
|
|
|
|
#print(points_co)
|
|
|
|
|
|
|
|
coord.set('points',points_co)
|
|
|
|
coord.set('points',points_co)
|
|
|
|
except:
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
@ -1709,9 +1677,8 @@ class eynollah:
|
|
|
|
points_co=points_co+str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
|
|
|
|
points_co=points_co+str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
|
|
|
|
points_co=points_co+','
|
|
|
|
points_co=points_co+','
|
|
|
|
points_co=points_co+str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
|
|
|
|
points_co=points_co+str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
|
|
|
|
|
|
|
|
if lmm < len(found_polygons_text_region_img[mm]) - 1:
|
|
|
|
if lmm<(len(found_polygons_text_region_img[mm])-1):
|
|
|
|
points_co += ' '
|
|
|
|
points_co=points_co+' '
|
|
|
|
|
|
|
|
coord_text.set('points', points_co)
|
|
|
|
coord_text.set('points', points_co)
|
|
|
|
except:
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|