mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-08 19:59:56 +02:00
all options are enabled for light version
This commit is contained in:
parent
3871e22c35
commit
3bbbeecfec
2 changed files with 31 additions and 16 deletions
|
@ -119,6 +119,12 @@ class Eynollah:
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
self.dir_out = dir_out
|
self.dir_out = dir_out
|
||||||
self.dir_in = dir_in
|
self.dir_in = dir_in
|
||||||
|
self.dir_of_all = dir_of_all
|
||||||
|
self.dir_of_deskewed = dir_of_deskewed
|
||||||
|
self.dir_of_deskewed = dir_of_deskewed
|
||||||
|
self.dir_of_cropped_images=dir_of_cropped_images
|
||||||
|
self.dir_of_layout=dir_of_layout
|
||||||
|
self.enable_plotting = enable_plotting
|
||||||
self.allow_enhancement = allow_enhancement
|
self.allow_enhancement = allow_enhancement
|
||||||
self.curved_line = curved_line
|
self.curved_line = curved_line
|
||||||
self.full_layout = full_layout
|
self.full_layout = full_layout
|
||||||
|
@ -128,14 +134,14 @@ class Eynollah:
|
||||||
self.headers_off = headers_off
|
self.headers_off = headers_off
|
||||||
self.light_version = light_version
|
self.light_version = light_version
|
||||||
self.pcgts = pcgts
|
self.pcgts = pcgts
|
||||||
self.plotter = None if not enable_plotting else EynollahPlotter(
|
|
||||||
dir_out=self.dir_out,
|
|
||||||
dir_of_all=dir_of_all,
|
|
||||||
dir_of_deskewed=dir_of_deskewed,
|
|
||||||
dir_of_cropped_images=dir_of_cropped_images,
|
|
||||||
dir_of_layout=dir_of_layout,
|
|
||||||
image_filename_stem=Path(Path(image_filename).name).stem)
|
|
||||||
if not dir_in:
|
if not dir_in:
|
||||||
|
self.plotter = None if not enable_plotting else EynollahPlotter(
|
||||||
|
dir_out=self.dir_out,
|
||||||
|
dir_of_all=dir_of_all,
|
||||||
|
dir_of_deskewed=dir_of_deskewed,
|
||||||
|
dir_of_cropped_images=dir_of_cropped_images,
|
||||||
|
dir_of_layout=dir_of_layout,
|
||||||
|
image_filename_stem=Path(Path(image_filename).name).stem)
|
||||||
self.writer = EynollahXmlWriter(
|
self.writer = EynollahXmlWriter(
|
||||||
dir_out=self.dir_out,
|
dir_out=self.dir_out,
|
||||||
image_filename=self.image_filename,
|
image_filename=self.image_filename,
|
||||||
|
@ -208,6 +214,14 @@ class Eynollah:
|
||||||
self._imgs = self._cache_images(image_filename=image_filename)
|
self._imgs = self._cache_images(image_filename=image_filename)
|
||||||
self.image_filename = image_filename
|
self.image_filename = image_filename
|
||||||
|
|
||||||
|
self.plotter = None if not self.enable_plotting else EynollahPlotter(
|
||||||
|
dir_out=self.dir_out,
|
||||||
|
dir_of_all=self.dir_of_all,
|
||||||
|
dir_of_deskewed=self.dir_of_deskewed,
|
||||||
|
dir_of_cropped_images=self.dir_of_cropped_images,
|
||||||
|
dir_of_layout=self.dir_of_layout,
|
||||||
|
image_filename_stem=Path(Path(image_filename).name).stem)
|
||||||
|
|
||||||
self.writer = EynollahXmlWriter(
|
self.writer = EynollahXmlWriter(
|
||||||
dir_out=self.dir_out,
|
dir_out=self.dir_out,
|
||||||
image_filename=self.image_filename,
|
image_filename=self.image_filename,
|
||||||
|
@ -1396,7 +1410,7 @@ class Eynollah:
|
||||||
|
|
||||||
queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess])
|
queue_of_all_params.put([textlines_rectangles_per_each_subprocess, bounding_box_of_textregion_per_each_subprocess, contours_textregion_per_each_subprocess, contours_textregion_par_per_each_subprocess, all_box_coord_per_process, index_by_text_region_contours, slopes_per_each_subprocess])
|
||||||
def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew):
|
def do_work_of_slopes_new_light(self, queue_of_all_params, boxes_text, textline_mask_tot_ea, contours_per_process, contours_par_per_process, indexes_r_con_per_pro, image_page_rotated, slope_deskew):
|
||||||
self.logger.debug('enter do_work_of_slopes_new')
|
self.logger.debug('enter do_work_of_slopes_new_light')
|
||||||
slopes_per_each_subprocess = []
|
slopes_per_each_subprocess = []
|
||||||
bounding_box_of_textregion_per_each_subprocess = []
|
bounding_box_of_textregion_per_each_subprocess = []
|
||||||
textlines_rectangles_per_each_subprocess = []
|
textlines_rectangles_per_each_subprocess = []
|
||||||
|
@ -1566,8 +1580,8 @@ class Eynollah:
|
||||||
q.put(slopes_sub)
|
q.put(slopes_sub)
|
||||||
poly.put(poly_sub)
|
poly.put(poly_sub)
|
||||||
box_sub.put(boxes_sub_new)
|
box_sub.put(boxes_sub_new)
|
||||||
def get_regions_from_xy_2models_light(self,img,is_image_enhanced, num_col_classifier):
|
def get_regions_light_v(self,img,is_image_enhanced, num_col_classifier):
|
||||||
self.logger.debug("enter get_regions_from_xy_2models")
|
self.logger.debug("enter get_regions_light_v")
|
||||||
erosion_hurts = False
|
erosion_hurts = False
|
||||||
img_org = np.copy(img)
|
img_org = np.copy(img)
|
||||||
img_height_h = img_org.shape[0]
|
img_height_h = img_org.shape[0]
|
||||||
|
@ -2929,7 +2943,7 @@ class Eynollah:
|
||||||
|
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if self.light_version:
|
if self.light_version:
|
||||||
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_from_xy_2models_light(img_res, is_image_enhanced, num_col_classifier)
|
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea = self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
||||||
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
||||||
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
|
#self.logger.info("Textregion detection took %.1fs ", time.time() - t1t)
|
||||||
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \
|
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, text_regions_p_1, cont_page, table_prediction, textline_mask_tot_ea = \
|
||||||
|
@ -3179,4 +3193,5 @@ class Eynollah:
|
||||||
##return pcgts
|
##return pcgts
|
||||||
self.writer.write_pagexml(pcgts)
|
self.writer.write_pagexml(pcgts)
|
||||||
#self.logger.info("Job done in %.1fs", time.time() - t0)
|
#self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||||
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
if self.dir_in:
|
||||||
|
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
||||||
|
|
|
@ -74,8 +74,8 @@ class EynollahPlotter():
|
||||||
if self.dir_of_layout is not None:
|
if self.dir_of_layout is not None:
|
||||||
values = np.unique(text_regions_p[:, :])
|
values = np.unique(text_regions_p[:, :])
|
||||||
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
|
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
|
||||||
pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
|
pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator", "Tables"]
|
||||||
values_indexes = [0, 1, 2, 8, 4, 5, 6]
|
values_indexes = [0, 1, 2, 8, 4, 5, 6, 10]
|
||||||
plt.figure(figsize=(40, 40))
|
plt.figure(figsize=(40, 40))
|
||||||
plt.rcParams["font.size"] = "40"
|
plt.rcParams["font.size"] = "40"
|
||||||
im = plt.imshow(text_regions_p[:, :])
|
im = plt.imshow(text_regions_p[:, :])
|
||||||
|
@ -88,8 +88,8 @@ class EynollahPlotter():
|
||||||
if self.dir_of_all is not None:
|
if self.dir_of_all is not None:
|
||||||
values = np.unique(text_regions_p[:, :])
|
values = np.unique(text_regions_p[:, :])
|
||||||
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
|
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
|
||||||
pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
|
pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator", "Tables"]
|
||||||
values_indexes = [0, 1, 2, 8, 4, 5, 6]
|
values_indexes = [0, 1, 2, 8, 4, 5, 6, 10]
|
||||||
plt.figure(figsize=(80, 40))
|
plt.figure(figsize=(80, 40))
|
||||||
plt.rcParams["font.size"] = "40"
|
plt.rcParams["font.size"] = "40"
|
||||||
plt.subplot(1, 2, 1)
|
plt.subplot(1, 2, 1)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue