Headers were not written correctly. Fixed

pull/19/head
vahidrezanezhad 4 years ago committed by GitHub
parent c2e9ebb366
commit 306e2a0a13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -33,6 +33,7 @@ from lxml import etree as ET
from matplotlib import pyplot, transforms
import matplotlib.patches as mpatches
import imutils
import matplotlib.pyplot as plt
from .utils.contour import (
contours_in_same_horizon,
@ -454,9 +455,12 @@ class eynollah:
# Also set for the plotter
# XXX TODO hacky
self.plotter.image_org = self.image_org
self.plotter.scale_y = self.scale_y
self.plotter.scale_x = self.scale_x
#self.plotter.image_org = self.image_org
#self.plotter.scale_y = self.scale_y
#self.plotter.scale_x = self.scale_x
def get_image_and_scales_after_enhancing(self, img_org, img_res):
@ -468,6 +472,7 @@ class eynollah:
self.scale_y = img_res.shape[0] / float(self.image_org.shape[0])
self.scale_x = img_res.shape[1] / float(self.image_org.shape[1])
del img_org
del img_res
@ -1324,9 +1329,11 @@ class eynollah:
#else:
# textregion.set('type','paragraph')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord, slopes, id_indexer_l)
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
texteqreg=ET.SubElement(textregion, 'TextEquiv')
unireg=ET.SubElement(texteqreg, 'Unicode')
@ -1459,9 +1466,14 @@ class eynollah:
except:
pass
print(dir_of_image)
print(self.f_name)
print(os.path.join(dir_of_image, self.f_name) + ".xml")
##print(dir_of_image)
##print(self.f_name)
##print(os.path.join(dir_of_image, self.f_name) + ".xml")
##tree = ET.ElementTree(pcgts)
##tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
print(self.image_filename_stem)
# print(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
tree = ET.ElementTree(pcgts)
tree.write(os.path.join(dir_of_image, self.image_filename_stem) + ".xml")
@ -2134,7 +2146,9 @@ class eynollah:
img_org, img_res, is_image_enhanced = self.resize_image_with_column_classifier(is_image_enhanced)
self.get_image_and_scales_after_enhancing(img_org, img_res)
# print(self.scale_x)
print("enhancing: " + str(time.time() - t1))
text_regions_p_1 = self.get_regions_from_xy_2models(img_res, is_image_enhanced)
@ -2377,8 +2391,8 @@ class eynollah:
text_regions_p[:, :][regions_fully[:, :, 0] == 4] = 4
text_regions_p[:, :][regions_fully_np[:, :, 0] == 4] = 4
# plt.imshow(text_regions_p)
# plt.show()
#plt.imshow(text_regions_p)
#plt.show()
if np.abs(slope_deskew) >= SLOPE_THRESHOLD:
image_page_rotated_n, textline_mask_tot_d, text_regions_p_1_n, regions_fully_n = rotation_not_90_func_full_layout(image_page, textline_mask_tot, text_regions_p, regions_fully, slope_deskew)
@ -2541,6 +2555,8 @@ class eynollah:
else:
contours_only_text_parent_d_ordered = None
text_regions_p, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, slopes, slopes_h, contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered = check_any_text_region_in_model_one_is_main_or_header(text_regions_p, regions_fully, contours_only_text_parent, all_box_coord, all_found_texline_polygons, slopes, contours_only_text_parent_d_ordered)
if self.plotter:
self.plotter.save_plot_of_layout(text_regions_p, image_page)

Loading…
Cancel
Save