@ -7,6 +7,7 @@ document layout analysis (segmentation) with output in PAGE-XML
"""
from logging import Logger
from dataclasses import dataclass
import math
from os import listdir
from os . path import join
@ -21,6 +22,8 @@ import numpy as np
from scipy . signal import find_peaks
from scipy . ndimage import gaussian_filter1d
from . utils . dirs import EynollahDirs
from . utils . tf import (
PatchEncoder ,
Patches ,
@ -85,17 +88,10 @@ class Eynollah():
def __init__ (
self ,
dir _models : str ,
dir s : EynollahDirs ,
logger : Logger ,
image_filename : Optional [ str ] = None ,
image_pil : Optional [ Image ] = None ,
dir_out : Optional [ str ] = None ,
dir_in : Optional [ str ] = None ,
dir_of_cropped_images : Optional [ str ] = None ,
dir_of_layout : Optional [ str ] = None ,
dir_of_deskewed : Optional [ str ] = None ,
dir_of_all : Optional [ str ] = None ,
dir_save_page : Optional [ str ] = None ,
image_filename : Optional [ str ] = None ,
enable_plotting : bool = False ,
allow_enhancement : bool = False ,
curved_line : bool = False ,
@ -111,7 +107,9 @@ class Eynollah():
override_dpi : Optional [ int ] = None ,
pcgts : Optional [ OcrdPage ] = None ,
) :
if not dir_in :
self . dirs = dirs
self . logger = logger
if not dirs . dir_in :
if image_pil :
self . _imgs = self . _cache_images ( image_pil = image_pil )
else :
@ -119,14 +117,6 @@ class Eynollah():
if override_dpi :
self . dpi = override_dpi
self . image_filename = image_filename
self . dir_out = dir_out
self . dir_in = dir_in
self . dir_of_all = dir_of_all
self . dir_save_page = dir_save_page
self . dir_of_deskewed = dir_of_deskewed
self . dir_of_deskewed = dir_of_deskewed
self . dir_of_cropped_images = dir_of_cropped_images
self . dir_of_layout = dir_of_layout
self . enable_plotting = enable_plotting
self . allow_enhancement = allow_enhancement
self . curved_line = curved_line
@ -140,43 +130,37 @@ class Eynollah():
self . light_version = light_version
self . ignore_page_extraction = ignore_page_extraction
self . pcgts = pcgts
if not dir_in :
self . plotter = None if not enable_plotting else EynollahPlotter (
dir_out = self . dir_out ,
dir_of_all = dir_of_all ,
dir_save_page = dir_save_page ,
dir_of_deskewed = dir_of_deskewed ,
dir_of_cropped_images = dir_of_cropped_images ,
dir_of_layout = dir_of_layout ,
image_filename_stem = Path ( Path ( image_filename ) . name ) . stem )
# self.batch_mode = bool(self.dirs.dir_in)
if not dirs . dir_in :
assert self . image_filename
self . plotter = None if not self . enable_plotting else EynollahPlotter ( self . dirs , image_filename_stem = Path ( Path ( image_filename ) . name ) . stem )
self . writer = EynollahXmlWriter (
dir_out = self . dir _out,
dir_out = self . dirs . dir_out ,
image_filename = self . image_filename ,
curved_line = self . curved_line ,
textline_light = self . textline_light ,
pcgts = pcgts )
self . logger = logger
self . dir_models = dir_models
self . model_dir_of_enhancement = dir_models + " /eynollah-enhancement_20210425 "
self . model_dir_of_binarization = dir_models + " /eynollah-binarization_20210425 "
self . model_dir_of_col_classifier = dir_models + " /eynollah-column-classifier_20210425 "
self . model_region_dir_p = dir_models + " /eynollah-main-regions-aug-scaling_20210425 "
self . model_region_dir_p2 = dir_models + " /eynollah-main-regions-aug-rotation_20210425 "
self . model_region_dir_fully_np = dir_models + " /eynollah-full-regions-1column_20210425 "
self . model_region_dir_fully = dir_models + " /eynollah-full-regions-3+column_20210425 "
self . model_page_dir = dir_models + " /eynollah-page-extraction_20210425 "
self . model_region_dir_p_ens = dir_models + " /eynollah-main-regions-ensembled_20210425 "
self . model_region_dir_p_ens_light = dir_models + " /eynollah-main-regions_20220314 "
self . model_dir_of_enhancement = dirs . dir_models + " /eynollah-enhancement_20210425 "
self . model_dir_of_binarization = dirs . dir_models + " /eynollah-binarization_20210425 "
self . model_dir_of_col_classifier = dirs . dir_models + " /eynollah-column-classifier_20210425 "
self . model_region_dir_p = dirs . dir_models + " /eynollah-main-regions-aug-scaling_20210425 "
self . model_region_dir_p2 = dirs . dir_models + " /eynollah-main-regions-aug-rotation_20210425 "
self . model_region_dir_fully_np = dirs . dir_models + " /eynollah-full-regions-1column_20210425 "
self . model_region_dir_fully = dirs . dir_models + " /eynollah-full-regions-3+column_20210425 "
self . model_page_dir = dirs . dir_models + " /eynollah-page-extraction_20210425 "
self . model_region_dir_p_ens = dirs . dir_models + " /eynollah-main-regions-ensembled_20210425 "
self . model_region_dir_p_ens_light = dirs . dir_models + " /eynollah-main-regions_20220314 "
if self . textline_light :
self . model_textline_dir = dir _models + " /eynollah-textline_light_20210425 "
self . model_textline_dir = dir s. dir _models + " /eynollah-textline_light_20210425 "
else :
self . model_textline_dir = dir _models + " /eynollah-textline_20210425 "
self . model_tables = dir _models + " /eynollah-tables_20210319 "
self . model_textline_dir = dir s. dir _models + " /eynollah-textline_20210425 "
self . model_tables = dir s. dir _models + " /eynollah-tables_20210319 "
self . models : dict [ str , tf . keras . Model ] = { }
if dir_in and light_version :
if self . dirs . dir_in and light_version :
config = tf . compat . v1 . ConfigProto ( )
config . gpu_options . allow_growth = True
session = tf . compat . v1 . Session ( config = config )
@ -190,9 +174,9 @@ class Eynollah():
self . model_region_fl_np = self . our_load_model ( self . model_region_dir_fully_np )
self . model_region_fl = self . our_load_model ( self . model_region_dir_fully )
self . ls_imgs = listdir ( self . dir _in)
self . ls_imgs = listdir ( self . dir s. dir _in)
if dir_in and not light_version :
if self . dirs . dir_in and not light_version :
config = tf . compat . v1 . ConfigProto ( )
config . gpu_options . allow_growth = True
session = tf . compat . v1 . Session ( config = config )
@ -208,7 +192,7 @@ class Eynollah():
self . model_region_fl = self . our_load_model ( self . model_region_dir_fully )
self . model_enhancement = self . our_load_model ( self . model_dir_of_enhancement )
self . ls_imgs = listdir ( self . dir _in)
self . ls_imgs = listdir ( self . dir s. dir _in)
def _cache_images ( self , image_filename = None , image_pil = None ) :
@ -228,21 +212,14 @@ class Eynollah():
self . _imgs = self . _cache_images ( image_filename = image_filename )
self . image_filename = image_filename
self . plotter = None if not self . enable_plotting else EynollahPlotter (
dir_out = self . dir_out ,
dir_of_all = self . dir_of_all ,
dir_save_page = self . dir_save_page ,
dir_of_deskewed = self . dir_of_deskewed ,
dir_of_cropped_images = self . dir_of_cropped_images ,
dir_of_layout = self . dir_of_layout ,
image_filename_stem = Path ( Path ( image_filename ) . name ) . stem )
self . plotter = None if not self . enable_plotting else EynollahPlotter ( self . dirs , image_filename_stem = Path ( Path ( image_filename ) . name ) . stem )
self . writer = EynollahXmlWriter (
dir_out = self . dir _out,
dir_out = self . dirs . dir_out ,
image_filename = self . image_filename ,
curved_line = self . curved_line ,
textline_light = self . textline_light ,
pcgts = self . pcgts )
def imread ( self , grayscale = False , uint8 = True ) :
key = ' img '
if grayscale :
@ -415,7 +392,7 @@ class Eynollah():
img = self . imread ( )
_ , page_coord = self . early_page_for_num_of_column_classification ( img )
if not self . dir _in:
if not self . dir s. dir _in:
model_num_classifier = self . load_model ( self . model_dir_of_col_classifier )
if self . input_binary :
img_in = np . copy ( img )
@ -439,7 +416,7 @@ class Eynollah():
img_in [ 0 , : , : , 1 ] = img_1ch [ : , : ]
img_in [ 0 , : , : , 2 ] = img_1ch [ : , : ]
if not self . dir _in:
if not self . dir s. dir _in:
label_p_pred = model_num_classifier . predict ( img_in , verbose = 0 )
else :
label_p_pred = self . model_classifier . predict ( img_in , verbose = 0 )
@ -462,7 +439,7 @@ class Eynollah():
self . logger . info ( " Detected %s DPI " , dpi )
if self . input_binary :
img = self . imread ( )
if self . dir _in:
if self . dir s. dir _in:
prediction_bin = self . do_prediction ( True , img , self . model_bin )
else :
@ -484,7 +461,7 @@ class Eynollah():
t1 = time . time ( )
_ , page_coord = self . early_page_for_num_of_column_classification ( img_bin )
if not self . dir _in:
if not self . dir s. dir _in:
model_num_classifier = self . load_model ( self . model_dir_of_col_classifier )
if self . input_binary :
@ -506,7 +483,7 @@ class Eynollah():
img_in [ 0 , : , : , 2 ] = img_1ch [ : , : ]
if self . dir _in:
if self . dir s. dir _in:
label_p_pred = self . model_classifier . predict ( img_in , verbose = 0 )
else :
label_p_pred = model_num_classifier . predict ( img_in , verbose = 0 )
@ -896,10 +873,10 @@ class Eynollah():
if not self . ignore_page_extraction :
img = cv2 . GaussianBlur ( self . image , ( 5 , 5 ) , 0 )
if not self . dir _in:
if not self . dir s. dir _in:
model_page = self . load_model ( self . model_page_dir )
if not self . dir _in:
if not self . dir s. dir _in:
img_page_prediction = self . do_prediction ( False , img , model_page )
else :
img_page_prediction = self . do_prediction ( False , img , self . model_page )
@ -944,11 +921,11 @@ class Eynollah():
img = img . astype ( np . uint8 )
else :
img = self . imread ( )
if not self . dir _in:
if not self . dir s. dir _in:
model_page = self . load_model ( self . model_page_dir )
img = cv2 . GaussianBlur ( img , ( 5 , 5 ) , 0 )
if self . dir _in:
if self . dir s. dir _in:
img_page_prediction = self . do_prediction ( False , img , self . model_page )
else :
img_page_prediction = self . do_prediction ( False , img , model_page )
@ -977,7 +954,7 @@ class Eynollah():
self . logger . debug ( " enter extract_text_regions " )
img_height_h = img . shape [ 0 ]
img_width_h = img . shape [ 1 ]
if not self . dir _in:
if not self . dir s. dir _in:
model_region = self . load_model ( self . model_region_dir_fully if patches else self . model_region_dir_fully_np )
else :
model_region = self . model_region_fl if patches else self . model_region_fl_np
@ -1444,19 +1421,19 @@ class Eynollah():
def textline_contours ( self , img , patches , scaler_h , scaler_w ) :
self . logger . debug ( ' enter textline_contours ' )
if not self . dir _in:
if not self . dir s. dir _in:
model_textline = self . load_model ( self . model_textline_dir if patches else self . model_textline_dir_np )
img = img . astype ( np . uint8 )
img_org = np . copy ( img )
img_h = img_org . shape [ 0 ]
img_w = img_org . shape [ 1 ]
img = resize_image ( img_org , int ( img_org . shape [ 0 ] * scaler_h ) , int ( img_org . shape [ 1 ] * scaler_w ) )
if not self . dir _in:
if not self . dir s. dir _in:
prediction_textline = self . do_prediction ( patches , img , model_textline )
else :
prediction_textline = self . do_prediction ( patches , img , self . model_textline )
prediction_textline = resize_image ( prediction_textline , img_h , img_w )
if not self . dir _in:
if not self . dir s. dir _in:
prediction_textline_longshot = self . do_prediction ( False , img , model_textline )
else :
prediction_textline_longshot = self . do_prediction ( False , img , self . model_textline )
@ -1502,6 +1479,7 @@ class Eynollah():
q . put ( slopes_sub )
poly . put ( poly_sub )
box_sub . put ( boxes_sub_new )
def get_regions_light_v ( self , img , is_image_enhanced , num_col_classifier ) :
self . logger . debug ( " enter get_regions_light_v " )
erosion_hurts = False
@ -1536,7 +1514,7 @@ class Eynollah():
img_h_new = int ( img_org . shape [ 0 ] / float ( img_org . shape [ 1 ] ) * img_w_new )
img_resized = resize_image ( img , img_h_new , img_w_new )
if not self . dir _in:
if not self . dir s. dir _in:
model_bin = self . load_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img_resized , model_bin )
else :
@ -1555,7 +1533,7 @@ class Eynollah():
textline_mask_tot_ea = self . run_textline ( img_bin )
if not self . dir _in:
if not self . dir s. dir _in:
model_region = self . load_model ( self . model_region_dir_p_ens_light )
prediction_regions_org = self . do_prediction_new_concept ( True , img_bin , model_region )
else :
@ -1600,14 +1578,14 @@ class Eynollah():
img_height_h = img_org . shape [ 0 ]
img_width_h = img_org . shape [ 1 ]
if not self . dir _in:
if not self . dir s. dir _in:
model_region = self . load_model ( self . model_region_dir_p_ens )
ratio_y = 1.3
ratio_x = 1
img = resize_image ( img_org , int ( img_org . shape [ 0 ] * ratio_y ) , int ( img_org . shape [ 1 ] * ratio_x ) )
if not self . dir _in:
if not self . dir s. dir _in:
prediction_regions_org_y = self . do_prediction ( True , img , model_region )
else :
prediction_regions_org_y = self . do_prediction ( True , img , self . model_region )
@ -1629,7 +1607,7 @@ class Eynollah():
img = resize_image ( img_org , int ( img_org . shape [ 0 ] ) , int ( img_org . shape [ 1 ] * ( 1.2 if is_image_enhanced else 1 ) ) )
if self . dir _in:
if self . dir s. dir _in:
prediction_regions_org = self . do_prediction ( True , img , self . model_region )
else :
prediction_regions_org = self . do_prediction ( True , img , model_region )
@ -1639,12 +1617,12 @@ class Eynollah():
prediction_regions_org [ ( prediction_regions_org [ : , : ] == 1 ) & ( mask_zeros_y [ : , : ] == 1 ) ] = 0
if not self . dir _in:
if not self . dir s. dir _in:
model_region = self . load_model ( self . model_region_dir_p2 )
img = resize_image ( img_org , int ( img_org . shape [ 0 ] ) , int ( img_org . shape [ 1 ] ) )
if self . dir _in:
if self . dir s. dir _in:
prediction_regions_org2 = self . do_prediction ( True , img , self . model_region_p2 , 0.2 )
else :
prediction_regions_org2 = self . do_prediction ( True , img , model_region , 0.2 )
@ -1678,7 +1656,7 @@ class Eynollah():
if self . input_binary :
prediction_bin = np . copy ( img_org )
else :
if not self . dir _in:
if not self . dir s. dir _in:
model_bin = self . load_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img_org , model_bin )
else :
@ -1691,7 +1669,7 @@ class Eynollah():
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
if not self . dir _in:
if not self . dir s. dir _in:
model_region = self . load_model ( self . model_region_dir_p_ens )
ratio_y = 1
ratio_x = 1
@ -1699,7 +1677,7 @@ class Eynollah():
img = resize_image ( prediction_bin , int ( img_org . shape [ 0 ] * ratio_y ) , int ( img_org . shape [ 1 ] * ratio_x ) )
if not self . dir _in:
if not self . dir s. dir _in:
prediction_regions_org = self . do_prediction ( True , img , model_region )
else :
prediction_regions_org = self . do_prediction ( True , img , self . model_region )
@ -1731,7 +1709,7 @@ class Eynollah():
if self . input_binary :
prediction_bin = np . copy ( img_org )
if not self . dir _in:
if not self . dir s. dir _in:
model_bin = self . load_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img_org , model_bin )
else :
@ -1746,7 +1724,7 @@ class Eynollah():
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
if not self . dir _in:
if not self . dir s. dir _in:
model_region = self . load_model ( self . model_region_dir_p_ens )
else :
@ -1756,7 +1734,7 @@ class Eynollah():
img = resize_image ( prediction_bin , int ( img_org . shape [ 0 ] * ratio_y ) , int ( img_org . shape [ 1 ] * ratio_x ) )
if not self . dir _in:
if not self . dir s. dir _in:
prediction_regions_org = self . do_prediction ( True , img , model_region )
else :
prediction_regions_org = self . do_prediction ( True , img , self . model_region )
@ -2755,13 +2733,13 @@ class Eynollah():
t0_tot = time . time ( )
if not self . dir _in:
if not self . dir s. dir _in:
self . ls_imgs = [ 1 ]
for img_name in self . ls_imgs :
t0 = time . time ( )
if self . dir _in:
self . reset_file_name_dir ( join ( self . dir _in, img_name ) )
if self . dir s. dir _in:
self . reset_file_name_dir ( join ( self . dir s. dir _in, img_name ) )
img_res , is_image_enhanced , num_col_classifier , num_column_is_classified = self . run_enhancement ( self . light_version )
self . logger . info ( " Enhancing took %.1f s " , time . time ( ) - t0 )
@ -2789,7 +2767,7 @@ class Eynollah():
self . logger . info ( " No columns detected, outputting an empty PAGE-XML " )
pcgts = self . writer . build_pagexml_no_full_layout ( [ ] , page_coord , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , cont_page , [ ] , [ ] )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t1 )
if self . dir _in:
if self . dir s. dir _in:
self . writer . write_pagexml ( pcgts )
continue
else :
@ -3017,7 +2995,7 @@ class Eynollah():
pcgts = self . writer . build_pagexml_full_layout ( contours_only_text_parent , contours_only_text_parent_h , page_coord , order_text_new , id_of_texts_tot , all_found_textline_polygons , all_found_textline_polygons_h , all_box_coord , all_box_coord_h , polygons_of_images , contours_tables , polygons_of_drop_capitals , polygons_of_marginals , all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_h , slopes_marginals , cont_page , polygons_lines_xml )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
if not self . dir _in:
if not self . dir s. dir _in:
return pcgts
else :
contours_only_text_parent_h = None
@ -3028,11 +3006,11 @@ class Eynollah():
order_text_new , id_of_texts_tot = self . do_order_of_regions ( contours_only_text_parent_d_ordered , contours_only_text_parent_h , boxes_d , textline_mask_tot_d )
pcgts = self . writer . build_pagexml_no_full_layout ( txt_con_org , page_coord , order_text_new , id_of_texts_tot , all_found_textline_polygons , all_box_coord , polygons_of_images , polygons_of_marginals , all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_marginals , cont_page , polygons_lines_xml , contours_tables )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
if not self . dir _in:
if not self . dir s. dir _in:
return pcgts
if self . dir _in:
if self . dir s. dir _in:
self . writer . write_pagexml ( pcgts )
#self.logger.info("Job done in %.1fs", time.time() - t0)
if self . dir _in:
if self . dir s. dir _in:
self . logger . info ( " All jobs done in %.1f s " , time . time ( ) - t0_tot )