@ -6,47 +6,57 @@
document layout analysis ( segmentation ) with output in PAGE - XML
document layout analysis ( segmentation ) with output in PAGE - XML
"""
"""
import tracemalloc
from logging import Logger
from difflib import SequenceMatcher as sq
import math
import math
import os
import os
import sys
import sys
import time
import time
from typing import Optional
import atexit
import atexit
import warnings
import warnings
from functools import partial
from functools import partial
from pathlib import Path
from pathlib import Path
from multiprocessing import cpu_count
from multiprocessing import cpu_count
from loky import ProcessPoolExecutor
import gc
import gc
from ocrd_utils import getLogger
import copy
import json
from loky import ProcessPoolExecutor
from PIL . Image import Image
import xml . etree . ElementTree as ET
import cv2
import cv2
import numpy as np
import numpy as np
from transformers import TrOCRProcessor
from PIL import Image
import torch
from difflib import SequenceMatcher as sq
from transformers import VisionEncoderDecoderModel
from numba import cuda
import copy
from scipy . signal import find_peaks
from scipy . signal import find_peaks
from scipy . ndimage import gaussian_filter1d
from scipy . ndimage import gaussian_filter1d
from numba import cuda
from ocrd import OcrdPage
from ocrd_utils import getLogger , tf_disable_interactive_logs
try :
import torch
except ImportError :
torch = None
try :
import matplotlib . pyplot as plt
except ImportError :
plt = None
try :
from transformers import TrOCRProcessor , VisionEncoderDecoderModel
except ImportError :
TrOCRProcessor = VisionEncoderDecoderModel = None
os . environ [ " TF_CPP_MIN_LOG_LEVEL " ] = " 3 "
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
stderr = sys . stderr
tf_disable_interactive_logs ( )
sys . stderr = open ( os . devnull , " w " )
import tensorflow as tf
import tensorflow as tf
from tensorflow . python . keras import backend as K
from tensorflow . python . keras import backend as K
from tensorflow . keras . models import load_model
from tensorflow . keras . models import load_model
sys . stderr = stderr
tf . get_logger ( ) . setLevel ( " ERROR " )
tf . get_logger ( ) . setLevel ( " ERROR " )
warnings . filterwarnings ( " ignore " )
warnings . filterwarnings ( " ignore " )
import matplotlib . pyplot as plt
# use tf1 compatibility for keras backend
# use tf1 compatibility for keras backend
from tensorflow . compat . v1 . keras . backend import set_session
from tensorflow . compat . v1 . keras . backend import set_session
from tensorflow . keras import layers
from tensorflow . keras import layers
import json
import xml . etree . ElementTree as ET
from tensorflow . keras . layers import StringLookup
from tensorflow . keras . layers import StringLookup
from . utils . contour import (
from . utils . contour import (
@ -166,54 +176,37 @@ class PatchEncoder(layers.Layer):
class Eynollah :
class Eynollah :
def __init__ (
def __init__ (
self ,
self ,
dir_models ,
dir_models : str ,
image_filename = None ,
dir_out : Optional [ str ] = None ,
image_pil = None ,
dir_of_cropped_images : Optional [ str ] = None ,
image_filename_stem = None ,
extract_only_images : bool = False ,
overwrite = False ,
dir_of_layout : Optional [ str ] = None ,
dir_out = None ,
dir_of_deskewed : Optional [ str ] = None ,
dir_in = None ,
dir_of_all : Optional [ str ] = None ,
dir_of_cropped_images = None ,
dir_save_page : Optional [ str ] = None ,
extract_only_images = False ,
enable_plotting : bool = False ,
dir_of_layout = None ,
allow_enhancement : bool = False ,
dir_of_deskewed = None ,
curved_line : bool = False ,
dir_of_all = None ,
textline_light : bool = False ,
dir_save_page = None ,
full_layout : bool = False ,
enable_plotting = False ,
tables : bool = False ,
allow_enhancement = False ,
right2left : bool = False ,
curved_line = False ,
input_binary : bool = False ,
textline_light = False ,
allow_scaling : bool = False ,
full_layout = False ,
headers_off : bool = False ,
tables = False ,
light_version : bool = False ,
right2left = False ,
ignore_page_extraction : bool = False ,
input_binary = False ,
reading_order_machine_based : bool = False ,
allow_scaling = False ,
do_ocr : bool = False ,
headers_off = False ,
num_col_upper : Optional [ int ] = None ,
light_version = False ,
num_col_lower : Optional [ int ] = None ,
ignore_page_extraction = False ,
skip_layout_and_reading_order : bool = False ,
reading_order_machine_based = False ,
logger : Logger = None ,
do_ocr = False ,
num_col_upper = None ,
num_col_lower = None ,
skip_layout_and_reading_order = False ,
override_dpi = None ,
logger = None ,
pcgts = None ,
) :
) :
if skip_layout_and_reading_order :
if skip_layout_and_reading_order :
textline_light = True
textline_light = True
self . light_version = light_version
self . light_version = light_version
if not dir_in :
if image_pil :
self . _imgs = self . _cache_images ( image_pil = image_pil )
else :
self . _imgs = self . _cache_images ( image_filename = image_filename )
if override_dpi :
self . dpi = override_dpi
self . image_filename = image_filename
self . overwrite = overwrite
self . dir_out = dir_out
self . dir_out = dir_out
self . dir_in = dir_in
self . dir_of_all = dir_of_all
self . dir_of_all = dir_of_all
self . dir_save_page = dir_save_page
self . dir_save_page = dir_save_page
self . reading_order_machine_based = reading_order_machine_based
self . reading_order_machine_based = reading_order_machine_based
@ -244,22 +237,6 @@ class Eynollah:
self . num_col_lower = int ( num_col_lower )
self . num_col_lower = int ( num_col_lower )
else :
else :
self . num_col_lower = num_col_lower
self . num_col_lower = num_col_lower
self . pcgts = pcgts
if not dir_in :
self . plotter = None if not enable_plotting else EynollahPlotter (
dir_out = self . dir_out ,
dir_of_all = dir_of_all ,
dir_save_page = dir_save_page ,
dir_of_deskewed = dir_of_deskewed ,
dir_of_cropped_images = dir_of_cropped_images ,
dir_of_layout = dir_of_layout ,
image_filename_stem = Path ( Path ( image_filename ) . name ) . stem )
self . writer = EynollahXmlWriter (
dir_out = self . dir_out ,
image_filename = self . image_filename ,
curved_line = self . curved_line ,
textline_light = self . textline_light ,
pcgts = pcgts )
self . logger = logger if logger else getLogger ( ' eynollah ' )
self . logger = logger if logger else getLogger ( ' eynollah ' )
# for parallelization of CPU-intensive tasks:
# for parallelization of CPU-intensive tasks:
self . executor = ProcessPoolExecutor ( max_workers = cpu_count ( ) , timeout = 1200 )
self . executor = ProcessPoolExecutor ( max_workers = cpu_count ( ) , timeout = 1200 )
@ -311,21 +288,25 @@ class Eynollah:
self . model_textline_dir = dir_models + " /modelens_textline_0_1__2_4_16092024 "
self . model_textline_dir = dir_models + " /modelens_textline_0_1__2_4_16092024 "
if self . ocr :
if self . ocr :
self . model_ocr_dir = dir_models + " /trocr_model_ens_of_3_checkpoints_201124 "
self . model_ocr_dir = dir_models + " /trocr_model_ens_of_3_checkpoints_201124 "
if self . tables :
if self . tables :
if self . light_version :
if self . light_version :
self . model_table_dir = dir_models + " /modelens_table_0t4_201124 "
self . model_table_dir = dir_models + " /modelens_table_0t4_201124 "
else :
else :
self . model_table_dir = dir_models + " /eynollah-tables_20210319 "
self . model_table_dir = dir_models + " /eynollah-tables_20210319 "
self . models = { }
# #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
# #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
if dir_in :
# #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
# as in start_new_session:
# config = tf.compat.v1.ConfigProto()
config = tf . compat . v1 . ConfigProto ( )
# config.gpu_options.allow_growth = True
config . gpu_options . allow_growth = True
# #session = tf.InteractiveSession()
session = tf . compat . v1 . Session ( config = config )
# session = tf.compat.v1.Session(config=config)
set_session ( session )
# set_session(session)
try :
for device in tf . config . list_physical_devices ( ' GPU ' ) :
tf . config . experimental . set_memory_growth ( device , True )
except :
self . logger . warning ( " no GPU device available " )
self . model_page = self . our_load_model ( self . model_page_dir )
self . model_page = self . our_load_model ( self . model_page_dir )
self . model_classifier = self . our_load_model ( self . model_dir_of_col_classifier )
self . model_classifier = self . our_load_model ( self . model_dir_of_col_classifier )
@ -354,9 +335,7 @@ class Eynollah:
if self . tables :
if self . tables :
self . model_table = self . our_load_model ( self . model_table_dir )
self . model_table = self . our_load_model ( self . model_table_dir )
self . ls_imgs = os . listdir ( self . dir_in )
def cache_images ( self , image_filename = None , image_pil = None , dpi = None ) :
def _cache_images ( self , image_filename = None , image_pil = None ) :
ret = { }
ret = { }
t_c0 = time . time ( )
t_c0 = time . time ( )
if image_filename :
if image_filename :
@ -374,12 +353,13 @@ class Eynollah:
ret [ ' img_grayscale ' ] = cv2 . cvtColor ( ret [ ' img ' ] , cv2 . COLOR_BGR2GRAY )
ret [ ' img_grayscale ' ] = cv2 . cvtColor ( ret [ ' img ' ] , cv2 . COLOR_BGR2GRAY )
for prefix in ( ' ' , ' _grayscale ' ) :
for prefix in ( ' ' , ' _grayscale ' ) :
ret [ f ' img { prefix } _uint8 ' ] = ret [ f ' img { prefix } ' ] . astype ( np . uint8 )
ret [ f ' img { prefix } _uint8 ' ] = ret [ f ' img { prefix } ' ] . astype ( np . uint8 )
return ret
self . _imgs = ret
if dpi is not None :
self . dpi = dpi
def reset_file_name_dir ( self , image_filename ) :
def reset_file_name_dir ( self , image_filename ) :
t_c = time . time ( )
t_c = time . time ( )
self . _imgs = self . _cache_images ( image_filename = image_filename )
self . cache_images ( image_filename = image_filename )
self . image_filename = image_filename
self . plotter = None if not self . enable_plotting else EynollahPlotter (
self . plotter = None if not self . enable_plotting else EynollahPlotter (
dir_out = self . dir_out ,
dir_out = self . dir_out ,
@ -392,10 +372,9 @@ class Eynollah:
self . writer = EynollahXmlWriter (
self . writer = EynollahXmlWriter (
dir_out = self . dir_out ,
dir_out = self . dir_out ,
image_filename = self . image_filename ,
image_filename = image_filename ,
curved_line = self . curved_line ,
curved_line = self . curved_line ,
textline_light = self . textline_light ,
textline_light = self . textline_light )
pcgts = self . pcgts )
def imread ( self , grayscale = False , uint8 = True ) :
def imread ( self , grayscale = False , uint8 = True ) :
key = ' img '
key = ' img '
@ -410,8 +389,6 @@ class Eynollah:
def predict_enhancement ( self , img ) :
def predict_enhancement ( self , img ) :
self . logger . debug ( " enter predict_enhancement " )
self . logger . debug ( " enter predict_enhancement " )
if not self . dir_in :
self . model_enhancement , _ = self . start_new_session_and_model ( self . model_dir_of_enhancement )
img_height_model = self . model_enhancement . layers [ - 1 ] . output_shape [ 1 ]
img_height_model = self . model_enhancement . layers [ - 1 ] . output_shape [ 1 ]
img_width_model = self . model_enhancement . layers [ - 1 ] . output_shape [ 2 ]
img_width_model = self . model_enhancement . layers [ - 1 ] . output_shape [ 2 ]
@ -609,9 +586,6 @@ class Eynollah:
_ , page_coord = self . early_page_for_num_of_column_classification ( img )
_ , page_coord = self . early_page_for_num_of_column_classification ( img )
if not self . dir_in :
self . model_classifier , _ = self . start_new_session_and_model ( self . model_dir_of_col_classifier )
if self . input_binary :
if self . input_binary :
img_in = np . copy ( img )
img_in = np . copy ( img )
img_in = img_in / 255.0
img_in = img_in / 255.0
@ -651,9 +625,6 @@ class Eynollah:
self . logger . info ( " Detected %s DPI " , dpi )
self . logger . info ( " Detected %s DPI " , dpi )
if self . input_binary :
if self . input_binary :
img = self . imread ( )
img = self . imread ( )
if not self . dir_in :
self . model_bin , _ = self . start_new_session_and_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img , self . model_bin , n_batch_inference = 5 )
prediction_bin = self . do_prediction ( True , img , self . model_bin , n_batch_inference = 5 )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 ) . astype ( np . uint8 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 ) . astype ( np . uint8 )
@ -670,9 +641,6 @@ class Eynollah:
self . image_page_org_size = img [ page_coord [ 0 ] : page_coord [ 1 ] , page_coord [ 2 ] : page_coord [ 3 ] , : ]
self . image_page_org_size = img [ page_coord [ 0 ] : page_coord [ 1 ] , page_coord [ 2 ] : page_coord [ 3 ] , : ]
self . page_coord = page_coord
self . page_coord = page_coord
if not self . dir_in :
self . model_classifier , _ = self . start_new_session_and_model ( self . model_dir_of_col_classifier )
if self . num_col_upper and not self . num_col_lower :
if self . num_col_upper and not self . num_col_lower :
num_col = self . num_col_upper
num_col = self . num_col_upper
label_p_pred = [ np . ones ( 6 ) ]
label_p_pred = [ np . ones ( 6 ) ]
@ -812,43 +780,6 @@ class Eynollah:
self . writer . height_org = self . height_org
self . writer . height_org = self . height_org
self . writer . width_org = self . width_org
self . writer . width_org = self . width_org
def start_new_session_and_model_old ( self , model_dir ) :
self . logger . debug ( " enter start_new_session_and_model (model_dir= %s ) " , model_dir )
config = tf . ConfigProto ( )
config . gpu_options . allow_growth = True
session = tf . InteractiveSession ( )
model = load_model ( model_dir , compile = False )
return model , session
def start_new_session_and_model ( self , model_dir ) :
self . logger . debug ( " enter start_new_session_and_model (model_dir= %s ) " , model_dir )
#gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
#gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
#session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
physical_devices = tf . config . list_physical_devices ( ' GPU ' )
try :
for device in physical_devices :
tf . config . experimental . set_memory_growth ( device , True )
except :
self . logger . warning ( " no GPU device available " )
if model_dir . endswith ( ' .h5 ' ) and Path ( model_dir [ : - 3 ] ) . exists ( ) :
# prefer SavedModel over HDF5 format if it exists
model_dir = model_dir [ : - 3 ]
if model_dir in self . models :
model = self . models [ model_dir ]
else :
try :
model = load_model ( model_dir , compile = False )
except :
model = load_model ( model_dir , compile = False , custom_objects = {
" PatchEncoder " : PatchEncoder , " Patches " : Patches } )
self . models [ model_dir ] = model
return model , None
def do_prediction (
def do_prediction (
self , patches , img , model ,
self , patches , img , model ,
n_batch_inference = 1 , marginal_of_patch_percent = 0.1 ,
n_batch_inference = 1 , marginal_of_patch_percent = 0.1 ,
@ -1386,9 +1317,6 @@ class Eynollah:
self . logger . debug ( " enter extract_page " )
self . logger . debug ( " enter extract_page " )
cont_page = [ ]
cont_page = [ ]
if not self . ignore_page_extraction :
if not self . ignore_page_extraction :
if not self . dir_in :
self . model_page , _ = self . start_new_session_and_model ( self . model_page_dir )
img = cv2 . GaussianBlur ( self . image , ( 5 , 5 ) , 0 )
img = cv2 . GaussianBlur ( self . image , ( 5 , 5 ) , 0 )
img_page_prediction = self . do_prediction ( False , img , self . model_page )
img_page_prediction = self . do_prediction ( False , img , self . model_page )
imgray = cv2 . cvtColor ( img_page_prediction , cv2 . COLOR_BGR2GRAY )
imgray = cv2 . cvtColor ( img_page_prediction , cv2 . COLOR_BGR2GRAY )
@ -1436,8 +1364,6 @@ class Eynollah:
img = np . copy ( img_bin ) . astype ( np . uint8 )
img = np . copy ( img_bin ) . astype ( np . uint8 )
else :
else :
img = self . imread ( )
img = self . imread ( )
if not self . dir_in :
self . model_page , _ = self . start_new_session_and_model ( self . model_page_dir )
img = cv2 . GaussianBlur ( img , ( 5 , 5 ) , 0 )
img = cv2 . GaussianBlur ( img , ( 5 , 5 ) , 0 )
img_page_prediction = self . do_prediction ( False , img , self . model_page )
img_page_prediction = self . do_prediction ( False , img , self . model_page )
@ -1465,11 +1391,6 @@ class Eynollah:
self . logger . debug ( " enter extract_text_regions " )
self . logger . debug ( " enter extract_text_regions " )
img_height_h = img . shape [ 0 ]
img_height_h = img . shape [ 0 ]
img_width_h = img . shape [ 1 ]
img_width_h = img . shape [ 1 ]
if not self . dir_in :
if patches :
self . model_region_fl , _ = self . start_new_session_and_model ( self . model_region_dir_fully )
else :
self . model_region_fl_np , _ = self . start_new_session_and_model ( self . model_region_dir_fully_np )
model_region = self . model_region_fl if patches else self . model_region_fl_np
model_region = self . model_region_fl if patches else self . model_region_fl_np
if self . light_version :
if self . light_version :
@ -1501,11 +1422,6 @@ class Eynollah:
self . logger . debug ( " enter extract_text_regions " )
self . logger . debug ( " enter extract_text_regions " )
img_height_h = img . shape [ 0 ]
img_height_h = img . shape [ 0 ]
img_width_h = img . shape [ 1 ]
img_width_h = img . shape [ 1 ]
if not self . dir_in :
if patches :
self . model_region_fl , _ = self . start_new_session_and_model ( self . model_region_dir_fully )
else :
self . model_region_fl_np , _ = self . start_new_session_and_model ( self . model_region_dir_fully_np )
model_region = self . model_region_fl if patches else self . model_region_fl_np
model_region = self . model_region_fl if patches else self . model_region_fl_np
if not patches :
if not patches :
@ -1636,8 +1552,6 @@ class Eynollah:
def textline_contours ( self , img , use_patches , scaler_h , scaler_w , num_col_classifier = None ) :
def textline_contours ( self , img , use_patches , scaler_h , scaler_w , num_col_classifier = None ) :
self . logger . debug ( ' enter textline_contours ' )
self . logger . debug ( ' enter textline_contours ' )
if not self . dir_in :
self . model_textline , _ = self . start_new_session_and_model ( self . model_textline_dir )
#img = img.astype(np.uint8)
#img = img.astype(np.uint8)
img_org = np . copy ( img )
img_org = np . copy ( img )
@ -1739,9 +1653,6 @@ class Eynollah:
img_h_new = int ( img . shape [ 0 ] / float ( img . shape [ 1 ] ) * img_w_new )
img_h_new = int ( img . shape [ 0 ] / float ( img . shape [ 1 ] ) * img_w_new )
img_resized = resize_image ( img , img_h_new , img_w_new )
img_resized = resize_image ( img , img_h_new , img_w_new )
if not self . dir_in :
self . model_region , _ = self . start_new_session_and_model ( self . model_region_dir_p_ens_light_only_images_extraction )
prediction_regions_org = self . do_prediction_new_concept ( True , img_resized , self . model_region )
prediction_regions_org = self . do_prediction_new_concept ( True , img_resized , self . model_region )
prediction_regions_org = resize_image ( prediction_regions_org , img_height_h , img_width_h )
prediction_regions_org = resize_image ( prediction_regions_org , img_height_h , img_width_h )
@ -1830,7 +1741,6 @@ class Eynollah:
img_height_h = img_org . shape [ 0 ]
img_height_h = img_org . shape [ 0 ]
img_width_h = img_org . shape [ 1 ]
img_width_h = img_org . shape [ 1 ]
#model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens)
#print(num_col_classifier,'num_col_classifier')
#print(num_col_classifier,'num_col_classifier')
if num_col_classifier == 1 :
if num_col_classifier == 1 :
@ -1853,8 +1763,6 @@ class Eynollah:
#if self.input_binary:
#if self.input_binary:
#img_bin = np.copy(img_resized)
#img_bin = np.copy(img_resized)
###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
###if (not self.input_binary and self.full_layout) or (not self.input_binary and num_col_classifier >= 30):
###if not self.dir_in:
###self.model_bin, _ = self.start_new_session_and_model(self.model_dir_of_binarization)
###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
###prediction_bin = self.do_prediction(True, img_resized, self.model_bin, n_batch_inference=5)
####print("inside bin ", time.time()-t_bin)
####print("inside bin ", time.time()-t_bin)
@ -1870,8 +1778,6 @@ class Eynollah:
###else:
###else:
###img_bin = np.copy(img_resized)
###img_bin = np.copy(img_resized)
if self . ocr and not self . input_binary :
if self . ocr and not self . input_binary :
if not self . dir_in :
self . model_bin , _ = self . start_new_session_and_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img_resized , self . model_bin , n_batch_inference = 5 )
prediction_bin = self . do_prediction ( True , img_resized , self . model_bin , n_batch_inference = 5 )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
@ -1894,12 +1800,7 @@ class Eynollah:
#plt.show()
#plt.show()
if not skip_layout_and_reading_order :
if not skip_layout_and_reading_order :
#print("inside 2 ", time.time()-t_in)
#print("inside 2 ", time.time()-t_in)
if not self . dir_in :
self . model_region_1_2 , _ = self . start_new_session_and_model ( self . model_region_dir_p_1_2_sp_np )
##self.model_region, _ = self.start_new_session_and_model(self.model_region_dir_p_ens_light)
if num_col_classifier == 1 or num_col_classifier == 2 :
if num_col_classifier == 1 or num_col_classifier == 2 :
model_region , session_region = self . start_new_session_and_model ( self . model_region_dir_p_1_2_sp_np )
if self . image_org . shape [ 0 ] / self . image_org . shape [ 1 ] > 2.5 :
if self . image_org . shape [ 0 ] / self . image_org . shape [ 1 ] > 2.5 :
self . logger . debug ( " resized to %d x %d for %d cols " ,
self . logger . debug ( " resized to %d x %d for %d cols " ,
img_resized . shape [ 1 ] , img_resized . shape [ 0 ] , num_col_classifier )
img_resized . shape [ 1 ] , img_resized . shape [ 0 ] , num_col_classifier )
@ -1998,9 +1899,6 @@ class Eynollah:
img_height_h = img_org . shape [ 0 ]
img_height_h = img_org . shape [ 0 ]
img_width_h = img_org . shape [ 1 ]
img_width_h = img_org . shape [ 1 ]
if not self . dir_in :
self . model_region , _ = self . start_new_session_and_model ( self . model_region_dir_p_ens )
ratio_y = 1.3
ratio_y = 1.3
ratio_x = 1
ratio_x = 1
@ -2026,9 +1924,6 @@ class Eynollah:
prediction_regions_org = prediction_regions_org [ : , : , 0 ]
prediction_regions_org = prediction_regions_org [ : , : , 0 ]
prediction_regions_org [ ( prediction_regions_org [ : , : ] == 1 ) & ( mask_zeros_y [ : , : ] == 1 ) ] = 0
prediction_regions_org [ ( prediction_regions_org [ : , : ] == 1 ) & ( mask_zeros_y [ : , : ] == 1 ) ] = 0
if not self . dir_in :
self . model_region_p2 , _ = self . start_new_session_and_model ( self . model_region_dir_p2 )
img = resize_image ( img_org , int ( img_org . shape [ 0 ] ) , int ( img_org . shape [ 1 ] ) )
img = resize_image ( img_org , int ( img_org . shape [ 0 ] ) , int ( img_org . shape [ 1 ] ) )
prediction_regions_org2 = self . do_prediction ( True , img , self . model_region_p2 , marginal_of_patch_percent = 0.2 )
prediction_regions_org2 = self . do_prediction ( True , img , self . model_region_p2 , marginal_of_patch_percent = 0.2 )
@ -2055,15 +1950,11 @@ class Eynollah:
if self . input_binary :
if self . input_binary :
prediction_bin = np . copy ( img_org )
prediction_bin = np . copy ( img_org )
else :
else :
if not self . dir_in :
self . model_bin , _ = self . start_new_session_and_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img_org , self . model_bin , n_batch_inference = 5 )
prediction_bin = self . do_prediction ( True , img_org , self . model_bin , n_batch_inference = 5 )
prediction_bin = resize_image ( prediction_bin , img_height_h , img_width_h )
prediction_bin = resize_image ( prediction_bin , img_height_h , img_width_h )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
if not self . dir_in :
self . model_region , _ = self . start_new_session_and_model ( self . model_region_dir_p_ens )
ratio_y = 1
ratio_y = 1
ratio_x = 1
ratio_x = 1
@ -2096,17 +1987,10 @@ class Eynollah:
except :
except :
if self . input_binary :
if self . input_binary :
prediction_bin = np . copy ( img_org )
prediction_bin = np . copy ( img_org )
if not self . dir_in :
self . model_bin , _ = self . start_new_session_and_model ( self . model_dir_of_binarization )
prediction_bin = self . do_prediction ( True , img_org , self . model_bin , n_batch_inference = 5 )
prediction_bin = self . do_prediction ( True , img_org , self . model_bin , n_batch_inference = 5 )
prediction_bin = resize_image ( prediction_bin , img_height_h , img_width_h )
prediction_bin = resize_image ( prediction_bin , img_height_h , img_width_h )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = 255 * ( prediction_bin [ : , : , 0 ] == 0 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
prediction_bin = np . repeat ( prediction_bin [ : , : , np . newaxis ] , 3 , axis = 2 )
if not self . dir_in :
self . model_region , _ = self . start_new_session_and_model ( self . model_region_dir_p_ens )
else :
else :
prediction_bin = np . copy ( img_org )
prediction_bin = np . copy ( img_org )
ratio_y = 1
ratio_y = 1
@ -2736,10 +2620,6 @@ class Eynollah:
img_org = np . copy ( img )
img_org = np . copy ( img )
img_height_h = img_org . shape [ 0 ]
img_height_h = img_org . shape [ 0 ]
img_width_h = img_org . shape [ 1 ]
img_width_h = img_org . shape [ 1 ]
if not self . dir_in :
self . model_table , _ = self . start_new_session_and_model ( self . model_table_dir )
patches = False
patches = False
if self . light_version :
if self . light_version :
prediction_table = self . do_prediction_new_concept ( patches , img , self . model_table )
prediction_table = self . do_prediction_new_concept ( patches , img , self . model_table )
@ -3376,7 +3256,11 @@ class Eynollah:
regions_without_separators_d , regions_fully , regions_without_separators ,
regions_without_separators_d , regions_fully , regions_without_separators ,
polygons_of_marginals , contours_tables )
polygons_of_marginals , contours_tables )
def our_load_model ( self , model_file ) :
@staticmethod
def our_load_model ( model_file ) :
if model_file . endswith ( ' .h5 ' ) and Path ( model_file [ : - 3 ] ) . exists ( ) :
# prefer SavedModel over HDF5 format if it exists
model_file = model_file [ : - 3 ]
try :
try :
model = load_model ( model_file , compile = False )
model = load_model ( model_file , compile = False )
except :
except :
@ -3427,9 +3311,6 @@ class Eynollah:
img_header_and_sep = resize_image ( img_header_and_sep , height1 , width1 )
img_header_and_sep = resize_image ( img_header_and_sep , height1 , width1 )
img_poly = resize_image ( img_poly , height3 , width3 )
img_poly = resize_image ( img_poly , height3 , width3 )
if not self . dir_in :
self . model_reading_order , _ = self . start_new_session_and_model ( self . model_reading_order_dir )
inference_bs = 3
inference_bs = 3
input_1 = np . zeros ( ( inference_bs , height1 , width1 , 3 ) )
input_1 = np . zeros ( ( inference_bs , height1 , width1 , 3 ) )
ordered = [ list ( range ( len ( co_text_all ) ) ) ]
ordered = [ list ( range ( len ( co_text_all ) ) ) ]
@ -3730,7 +3611,7 @@ class Eynollah:
for ij in range ( len ( all_found_textline_polygons [ j ] ) ) :
for ij in range ( len ( all_found_textline_polygons [ j ] ) ) :
con_ind = all_found_textline_polygons [ j ] [ ij ]
con_ind = all_found_textline_polygons [ j ] [ ij ]
area = cv2 . contourArea ( con_ind )
area = cv2 . contourArea ( con_ind )
con_ind = con_ind . astype ( np . float)
con_ind = con_ind . astype ( float)
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
@ -3834,7 +3715,7 @@ class Eynollah:
con_ind = all_found_textline_polygons [ j ]
con_ind = all_found_textline_polygons [ j ]
#print(len(con_ind[:,0,0]),'con_ind[:,0,0]')
#print(len(con_ind[:,0,0]),'con_ind[:,0,0]')
area = cv2 . contourArea ( con_ind )
area = cv2 . contourArea ( con_ind )
con_ind = con_ind . astype ( np . float)
con_ind = con_ind . astype ( float)
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
@ -3937,7 +3818,7 @@ class Eynollah:
con_ind = all_found_textline_polygons [ j ] [ ij ]
con_ind = all_found_textline_polygons [ j ] [ ij ]
area = cv2 . contourArea ( con_ind )
area = cv2 . contourArea ( con_ind )
con_ind = con_ind . astype ( np . float)
con_ind = con_ind . astype ( float)
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
@ -4080,10 +3961,8 @@ class Eynollah:
ind_textline_inside_tr = list ( range ( len ( contours [ jj ] ) ) )
ind_textline_inside_tr = list ( range ( len ( contours [ jj ] ) ) )
index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr
index_textline_inside_textregion = index_textline_inside_textregion + ind_textline_inside_tr
#ind_ins = [0] * len(contours[jj]) + jj
ind_ins = [ jj ] * len ( contours [ jj ] )
ind_ins = np . zeros ( len ( contours [ jj ] ) ) + jj
indexes_of_textline_tot = indexes_of_textline_tot + ind_ins
list_ind_ins = list ( ind_ins )
indexes_of_textline_tot = indexes_of_textline_tot + list_ind_ins
M_main_tot = [ cv2 . moments ( contours_txtline_of_all_textregions [ j ] )
M_main_tot = [ cv2 . moments ( contours_txtline_of_all_textregions [ j ] )
for j in range ( len ( contours_txtline_of_all_textregions ) ) ]
for j in range ( len ( contours_txtline_of_all_textregions ) ) ]
@ -4171,7 +4050,7 @@ class Eynollah:
for j in range ( len ( all_found_textline_polygons ) ) :
for j in range ( len ( all_found_textline_polygons ) ) :
for i in range ( len ( all_found_textline_polygons [ j ] ) ) :
for i in range ( len ( all_found_textline_polygons [ j ] ) ) :
con_ind = all_found_textline_polygons [ j ] [ i ]
con_ind = all_found_textline_polygons [ j ] [ i ]
con_ind = con_ind . astype ( np . float)
con_ind = con_ind . astype ( float)
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
x_differential = np . diff ( con_ind [ : , 0 , 0 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
y_differential = np . diff ( con_ind [ : , 0 , 1 ] )
@ -4311,31 +4190,44 @@ class Eynollah:
return ( slopes_rem , all_found_textline_polygons_rem , boxes_text_rem , txt_con_org_rem ,
return ( slopes_rem , all_found_textline_polygons_rem , boxes_text_rem , txt_con_org_rem ,
contours_only_text_parent_rem , index_by_text_par_con_rem_sort )
contours_only_text_parent_rem , index_by_text_par_con_rem_sort )
def run ( self ):
def run ( self , image_filename : Optional [ str ] = None , dir_in : Optional [ str ] = None , overwrite : bool = False ):
"""
"""
Get image and scales , then extract the page of scanned image
Get image and scales , then extract the page of scanned image
"""
"""
self . logger . debug ( " enter run " )
self . logger . debug ( " enter run " )
t0_tot = time . time ( )
t0_tot = time . time ( )
if not self . dir_in :
if dir_in :
self . ls_imgs = [ 1 ]
self . ls_imgs = os . listdir ( dir_in )
elif image_filename :
self . ls_imgs = [ image_filename ]
else :
raise ValueError ( " run requires either a single image filename or a directory " )
for img_name in self . ls_imgs :
for img_ file name in self . ls_imgs :
self . logger . info ( img_name )
self . logger . info ( img_ file name)
t0 = time . time ( )
t0 = time . time ( )
if self . dir_in :
self . reset_file_name_dir ( os . path . join ( self . dir_in , img_name ) )
#print("text region early -11 in %.1fs", time.time() - t0)
self . reset_file_name_dir ( os . path . join ( dir_in or " " , img_filename ) )
#print("text region early -11 in %.1fs", time.time() - t0)
if os . path . exists ( self . writer . output_filename ) :
if os . path . exists ( self . writer . output_filename ) :
if self . overwrite :
if overwrite :
self . logger . warning ( " will overwrite existing output file ' %s ' " , self . writer . output_filename )
self . logger . warning ( " will overwrite existing output file ' %s ' " , self . writer . output_filename )
else :
else :
self . logger . warning ( " will skip input for existing output file ' %s ' " , self . writer . output_filename )
self . logger . warning ( " will skip input for existing output file ' %s ' " , self . writer . output_filename )
continue
continue
pcgts = self . run_single ( )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
#print("Job done in %.1fs" % (time.time() - t0))
self . writer . write_pagexml ( pcgts )
if dir_in :
self . logger . info ( " All jobs done in %.1f s " , time . time ( ) - t0_tot )
print ( " all Job done in %.1f s " , time . time ( ) - t0_tot )
def run_single ( self ) :
t0 = time . time ( )
img_res , is_image_enhanced , num_col_classifier , num_column_is_classified = self . run_enhancement ( self . light_version )
img_res , is_image_enhanced , num_col_classifier , num_column_is_classified = self . run_enhancement ( self . light_version )
self . logger . info ( " Enhancing took %.1f s " , time . time ( ) - t0 )
self . logger . info ( " Enhancing took %.1f s " , time . time ( ) - t0 )
if self . extract_only_images :
if self . extract_only_images :
@ -4348,11 +4240,6 @@ class Eynollah:
cont_page , [ ] , [ ] , ocr_all_textlines )
cont_page , [ ] , [ ] , ocr_all_textlines )
if self . plotter :
if self . plotter :
self . plotter . write_images_into_directory ( polygons_of_images , image_page )
self . plotter . write_images_into_directory ( polygons_of_images , image_page )
if self . dir_in :
self . writer . write_pagexml ( pcgts )
continue
else :
return pcgts
return pcgts
if self . skip_layout_and_reading_order :
if self . skip_layout_and_reading_order :
@ -4395,10 +4282,6 @@ class Eynollah:
all_found_textline_polygons , page_coord , polygons_of_images , polygons_of_marginals ,
all_found_textline_polygons , page_coord , polygons_of_images , polygons_of_marginals ,
all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_marginals ,
all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_marginals ,
cont_page , polygons_lines_xml , contours_tables , ocr_all_textlines )
cont_page , polygons_lines_xml , contours_tables , ocr_all_textlines )
if self . dir_in :
self . writer . write_pagexml ( pcgts )
continue
else :
return pcgts
return pcgts
#print("text region early -1 in %.1fs", time.time() - t0)
#print("text region early -1 in %.1fs", time.time() - t0)
@ -4451,11 +4334,6 @@ class Eynollah:
pcgts = self . writer . build_pagexml_no_full_layout (
pcgts = self . writer . build_pagexml_no_full_layout (
[ ] , page_coord , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] ,
[ ] , page_coord , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] , [ ] ,
cont_page , [ ] , [ ] , ocr_all_textlines )
cont_page , [ ] , [ ] , ocr_all_textlines )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t1 )
if self . dir_in :
self . writer . write_pagexml ( pcgts )
continue
else :
return pcgts
return pcgts
#print("text region early in %.1fs", time.time() - t0)
#print("text region early in %.1fs", time.time() - t0)
@ -4641,11 +4519,6 @@ class Eynollah:
polygons_of_images ,
polygons_of_images ,
polygons_of_marginals , empty_marginals , empty_marginals , [ ] , [ ] ,
polygons_of_marginals , empty_marginals , empty_marginals , [ ] , [ ] ,
cont_page , polygons_lines_xml , contours_tables , [ ] )
cont_page , polygons_lines_xml , contours_tables , [ ] )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
if self . dir_in :
self . writer . write_pagexml ( pcgts )
continue
else :
return pcgts
return pcgts
#print("text region early 3 in %.1fs", time.time() - t0)
#print("text region early 3 in %.1fs", time.time() - t0)
@ -4836,15 +4709,8 @@ class Eynollah:
polygons_of_images , contours_tables , polygons_of_drop_capitals , polygons_of_marginals ,
polygons_of_images , contours_tables , polygons_of_drop_capitals , polygons_of_marginals ,
all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_h , slopes_marginals ,
all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_h , slopes_marginals ,
cont_page , polygons_lines_xml , ocr_all_textlines )
cont_page , polygons_lines_xml , ocr_all_textlines )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
#print("Job done in %.1fs", time.time() - t0)
if self . dir_in :
self . writer . write_pagexml ( pcgts )
continue
else :
return pcgts
return pcgts
else :
contours_only_text_parent_h = None
contours_only_text_parent_h = None
if self . reading_order_machine_based :
if self . reading_order_machine_based :
order_text_new , id_of_texts_tot = self . do_order_of_regions_with_model (
order_text_new , id_of_texts_tot = self . do_order_of_regions_with_model (
@ -4922,20 +4788,7 @@ class Eynollah:
all_found_textline_polygons , all_box_coord , polygons_of_images , polygons_of_marginals ,
all_found_textline_polygons , all_box_coord , polygons_of_images , polygons_of_marginals ,
all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_marginals ,
all_found_textline_polygons_marginals , all_box_coord_marginals , slopes , slopes_marginals ,
cont_page , polygons_lines_xml , contours_tables , ocr_all_textlines )
cont_page , polygons_lines_xml , contours_tables , ocr_all_textlines )
#print("Job done in %.1fs" % (time.time() - t0))
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
if not self . dir_in :
return pcgts
return pcgts
#print("text region early 7 in %.1fs", time.time() - t0)
if self . dir_in :
self . writer . write_pagexml ( pcgts )
self . logger . info ( " Job done in %.1f s " , time . time ( ) - t0 )
#print("Job done in %.1fs" % (time.time() - t0))
if self . dir_in :
self . logger . info ( " All jobs done in %.1f s " , time . time ( ) - t0_tot )
print ( " all Job done in %.1f s " , time . time ( ) - t0_tot )
class Eynollah_ocr :
class Eynollah_ocr :