eynollah: define self.cont_page in __init__, extract page coord calculation

pull/19/head
Konstantin Baierer 4 years ago
parent 52df6972ad
commit 4a5c99008a

@ -139,6 +139,7 @@ class eynollah:
headers_off=False headers_off=False
): ):
self.image_filename = image_filename # XXX This does not seem to be a directory as the name suggests, but a file self.image_filename = image_filename # XXX This does not seem to be a directory as the name suggests, but a file
self.cont_page = []
self.dir_out = dir_out self.dir_out = dir_out
self.image_filename_stem = image_filename_stem self.image_filename_stem = image_filename_stem
self.dir_of_cropped_images = dir_of_cropped_images self.dir_of_cropped_images = dir_of_cropped_images
@ -821,7 +822,6 @@ class eynollah:
croped_page, page_coord = crop_image_inside_box(box, self.image) croped_page, page_coord = crop_image_inside_box(box, self.image)
self.cont_page = []
self.cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]])) self.cont_page.append(np.array([[page_coord[2], page_coord[0]], [page_coord[3], page_coord[0]], [page_coord[3], page_coord[1]], [page_coord[2], page_coord[1]]]))
session_page.close() session_page.close()
@ -1414,24 +1414,7 @@ class eynollah:
page_print_sub = ET.SubElement(page, "PrintSpace") page_print_sub = ET.SubElement(page, "PrintSpace")
coord_page = ET.SubElement(page_print_sub, "Coords") coord_page = ET.SubElement(page_print_sub, "Coords")
points_page_print = "" coord_page.set('points', self.calculate_page_coords())
for lmm in range(len(self.cont_page[0])):
if len(self.cont_page[0][lmm])==2:
points_page_print=points_page_print+str( int( (self.cont_page[0][lmm][0])/self.scale_x ) )
points_page_print=points_page_print+','
points_page_print=points_page_print+str( int( (self.cont_page[0][lmm][1])/self.scale_y ) )
else:
points_page_print=points_page_print+str( int((self.cont_page[0][lmm][0][0])/self.scale_x) )
points_page_print=points_page_print+','
points_page_print=points_page_print+str( int((self.cont_page[0][lmm][0][1])/self.scale_y) )
if lmm<(len(self.cont_page[0])-1):
points_page_print=points_page_print+' '
coord_page.set('points',points_page_print)
if len(contours)>0: if len(contours)>0:
region_order=ET.SubElement(page, 'ReadingOrder') region_order=ET.SubElement(page, 'ReadingOrder')
@ -1928,36 +1911,36 @@ class eynollah:
tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml") tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
def write_into_page_xml(self, contours, page_coord, dir_of_image, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, curved_line, slopes, slopes_marginals): def calculate_page_coords(self):
found_polygons_text_region = contours
##found_polygons_text_region_h=contours_h
# create the file structure
pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
page_print_sub = ET.SubElement(page, "PrintSpace")
coord_page = ET.SubElement(page_print_sub, "Coords")
points_page_print = "" points_page_print = ""
for lmm in range(len(self.cont_page[0])): for lmm in range(len(self.cont_page[0])):
if len(self.cont_page[0][lmm]) == 2: if len(self.cont_page[0][lmm]) == 2:
points_page_print = points_page_print + str( int( ( self.cont_page[0][lmm][0] ) / self.scale_x ) ) points_page_print = points_page_print + str(int((self.cont_page[0][lmm][0] ) / self.scale_x))
points_page_print = points_page_print + ',' points_page_print = points_page_print + ','
points_page_print = points_page_print + str( int( ( self.cont_page[0][lmm][1] ) / self.scale_y ) ) points_page_print = points_page_print + str(int((self.cont_page[0][lmm][1] ) / self.scale_y))
else: else:
points_page_print = points_page_print + str( int( ( self.cont_page[0][lmm][0][0]) / self.scale_x ) ) points_page_print = points_page_print + str(int((self.cont_page[0][lmm][0][0]) / self.scale_x))
points_page_print=points_page_print + ',' points_page_print = points_page_print + ','
points_page_print=points_page_print + str( int( ( self.cont_page[0][lmm][0][1] ) / self.scale_y) ) points_page_print = points_page_print + str(int((self.cont_page[0][lmm][0][1] ) / self.scale_y))
if lmm < (len( self.cont_page[0] ) - 1): if lmm < (len( self.cont_page[0] ) - 1):
points_page_print = points_page_print + ' ' points_page_print = points_page_print + ' '
coord_page.set( 'points', points_page_print ) return points_page_print
def write_into_page_xml(self, contours, page_coord, dir_of_image, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, curved_line, slopes, slopes_marginals):
found_polygons_text_region = contours
##found_polygons_text_region_h=contours_h
# create the file structure
pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
page_print_sub = ET.SubElement(page, "PrintSpace")
coord_page = ET.SubElement(page_print_sub, "Coords")
coord_page.set('points', self.calculate_page_coords())
if len(contours) > 0: if len(contours) > 0:
region_order = ET.SubElement(page, 'ReadingOrder') region_order = ET.SubElement(page, 'ReadingOrder')
region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
region_order_sub.set('id',"ro357564684568544579089") region_order_sub.set('id',"ro357564684568544579089")
indexer_region=0 indexer_region=0

Loading…
Cancel
Save