Merge remote-tracking branch 'michalbubula/add-feedback' into prepare-release-v0.5.0

This commit is contained in:
kba 2025-09-23 19:50:27 +02:00
commit 9ead58b99a
3 changed files with 187 additions and 25 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ models_eynollah*
output.html output.html
/build /build
/dist /dist
*.tif

View file

@ -1,5 +1,6 @@
import sys import sys
import click import click
import logging
from ocrd_utils import initLogging, getLevelName, getLogger from ocrd_utils import initLogging, getLevelName, getLogger
from eynollah.eynollah import Eynollah, Eynollah_ocr from eynollah.eynollah import Eynollah, Eynollah_ocr
from eynollah.sbb_binarize import SbbBinarizer from eynollah.sbb_binarize import SbbBinarizer
@ -335,15 +336,30 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
is_flag=True, is_flag=True,
help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.", help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
) )
# TODO move to top-level CLI context
@click.option( @click.option(
"--log_level", "--log_level",
"-l", "-l",
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override 'eynollah' log level globally to this",
)
#
@click.option(
"--setup-logging",
is_flag=True,
help="Setup a basic console logger",
) )
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level): def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
initLogging() if setup_logging:
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
console_handler.setFormatter(formatter)
getLogger('eynollah').addHandler(console_handler)
getLogger('eynollah').setLevel(logging.INFO)
else:
initLogging()
if log_level: if log_level:
getLogger('eynollah').setLevel(getLevelName(log_level)) getLogger('eynollah').setLevel(getLevelName(log_level))
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep" assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
@ -367,7 +383,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
assert image or dir_in, "Either a single image -i or a dir_in -di is required" assert image or dir_in, "Either a single image -i or a dir_in -di is required"
eynollah = Eynollah( eynollah = Eynollah(
model, model,
logger=getLogger('eynollah'),
dir_out=out, dir_out=out,
dir_of_cropped_images=save_images, dir_of_cropped_images=save_images,
extract_only_images=extract_only_images, extract_only_images=extract_only_images,

View file

@ -6,7 +6,6 @@
document layout analysis (segmentation) with output in PAGE-XML document layout analysis (segmentation) with output in PAGE-XML
""" """
from logging import Logger
from difflib import SequenceMatcher as sq from difflib import SequenceMatcher as sq
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
import math import math
@ -219,8 +218,9 @@ class Eynollah:
threshold_art_class_layout: Optional[float] = None, threshold_art_class_layout: Optional[float] = None,
threshold_art_class_textline: Optional[float] = None, threshold_art_class_textline: Optional[float] = None,
skip_layout_and_reading_order : bool = False, skip_layout_and_reading_order : bool = False,
logger : Optional[Logger] = None,
): ):
self.logger = getLogger('eynollah')
if skip_layout_and_reading_order: if skip_layout_and_reading_order:
textline_light = True textline_light = True
self.light_version = light_version self.light_version = light_version
@ -267,10 +267,6 @@ class Eynollah:
else: else:
self.threshold_art_class_textline = 0.1 self.threshold_art_class_textline = 0.1
self.logger = logger if logger else getLogger('eynollah')
# for parallelization of CPU-intensive tasks:
self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
atexit.register(self.executor.shutdown)
self.dir_models = dir_models self.dir_models = dir_models
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
@ -326,6 +322,13 @@ class Eynollah:
else: else:
self.model_table_dir = dir_models + "/eynollah-tables_20210319" self.model_table_dir = dir_models + "/eynollah-tables_20210319"
t_start = time.time()
# for parallelization of CPU-intensive tasks:
self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
atexit.register(self.executor.shutdown)
# #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True) # #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
# #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True) # #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
# #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) # #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
@ -340,6 +343,8 @@ class Eynollah:
except: except:
self.logger.warning("no GPU device available") self.logger.warning("no GPU device available")
self.logger.info("Loading models...")
self.model_page = self.our_load_model(self.model_page_dir) self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
self.model_bin = self.our_load_model(self.model_dir_of_binarization) self.model_bin = self.our_load_model(self.model_dir_of_binarization)
@ -393,6 +398,8 @@ class Eynollah:
if self.tables: if self.tables:
self.model_table = self.our_load_model(self.model_table_dir) self.model_table = self.our_load_model(self.model_table_dir)
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
def cache_images(self, image_filename=None, image_pil=None, dpi=None): def cache_images(self, image_filename=None, image_pil=None, dpi=None):
ret = {} ret = {}
t_c0 = time.time() t_c0 = time.time()
@ -4548,19 +4555,48 @@ class Eynollah:
pcgts = self.run_single() pcgts = self.run_single()
self.logger.info("Job done in %.1fs", time.time() - t0) self.logger.info("Job done in %.1fs", time.time() - t0)
#print("Job done in %.1fs" % (time.time() - t0))
self.writer.write_pagexml(pcgts) self.writer.write_pagexml(pcgts)
if dir_in: if dir_in:
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot) self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
print("all Job done in %.1fs", time.time() - t0_tot)
def run_single(self): def run_single(self):
t0 = time.time() t0 = time.time()
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
self.logger.info(f"Processing file: {self.writer.image_filename}")
# Log enabled features directly
enabled_modes = []
if self.light_version:
enabled_modes.append("Light version")
if self.textline_light:
enabled_modes.append("Light textline detection")
if self.full_layout:
enabled_modes.append("Full layout analysis")
if self.ocr:
enabled_modes.append("OCR")
if self.tables:
enabled_modes.append("Table detection")
if enabled_modes:
self.logger.info("Enabled modes: " + ", ".join(enabled_modes))
self.logger.info("Step 1/5: Image Enhancement")
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns")
if is_image_enhanced:
self.logger.info("Enhancement applied")
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
# Image Extraction Mode
if self.extract_only_images: if self.extract_only_images:
self.logger.info("Step 2/5: Image Extraction Mode")
text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \ text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier) self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
@ -4569,9 +4605,15 @@ class Eynollah:
cont_page, [], []) cont_page, [], [])
if self.plotter: if self.plotter:
self.plotter.write_images_into_directory(polygons_of_images, image_page) self.plotter.write_images_into_directory(polygons_of_images, image_page)
self.logger.info("Image extraction complete")
return pcgts return pcgts
# Basic Processing Mode
if self.skip_layout_and_reading_order: if self.skip_layout_and_reading_order:
self.logger.info("Step 2/5: Basic Processing Mode")
self.logger.info("Skipping layout analysis and reading order detection")
_ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \ _ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier, self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier,
skip_layout_and_reading_order=self.skip_layout_and_reading_order) skip_layout_and_reading_order=self.skip_layout_and_reading_order)
@ -4623,11 +4665,15 @@ class Eynollah:
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order) cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
self.logger.info("Basic processing complete")
return pcgts return pcgts
#print("text region early -1 in %.1fs", time.time() - t0) #print("text region early -1 in %.1fs", time.time() - t0)
t1 = time.time() t1 = time.time()
self.logger.info("Step 2/5: Layout Analysis")
if self.light_version: if self.light_version:
self.logger.info("Using light version processing")
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \ text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier) self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
#print("text region early -2 in %.1fs", time.time() - t0) #print("text region early -2 in %.1fs", time.time() - t0)
@ -4658,19 +4704,22 @@ class Eynollah:
text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \ text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
self.get_regions_from_xy_2models(img_res, is_image_enhanced, self.get_regions_from_xy_2models(img_res, is_image_enhanced,
num_col_classifier) num_col_classifier)
self.logger.info("Textregion detection took %.1fs ", time.time() - t1) self.logger.info(f"Textregion detection took {time.time() - t1:.1f}s")
confidence_matrix = np.zeros((text_regions_p_1.shape[:2])) confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))
t1 = time.time() t1 = time.time()
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \ num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
text_regions_p_1, cont_page, table_prediction = \ text_regions_p_1, cont_page, table_prediction = \
self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts) self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
self.logger.info("Graphics detection took %.1fs ", time.time() - t1) self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s")
#self.logger.info('cont_page %s', cont_page) #self.logger.info('cont_page %s', cont_page)
#plt.imshow(table_prediction) #plt.imshow(table_prediction)
#plt.show() #plt.show()
self.logger.info(f"Layout analysis complete ({time.time() - t1:.1f}s)")
if not num_col: if not num_col:
self.logger.info("No columns detected, outputting an empty PAGE-XML") self.logger.info("No columns detected - generating empty PAGE-XML")
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
[], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [],
cont_page, [], []) cont_page, [], [])
@ -4680,10 +4729,12 @@ class Eynollah:
t1 = time.time() t1 = time.time()
if not self.light_version: if not self.light_version:
textline_mask_tot_ea = self.run_textline(image_page) textline_mask_tot_ea = self.run_textline(image_page)
self.logger.info("textline detection took %.1fs", time.time() - t1) self.logger.info(f"Textline detection took {time.time() - t1:.1f}s")
t1 = time.time() t1 = time.time()
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea) slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
self.logger.info("deskewing took %.1fs", time.time() - t1) if np.abs(slope_deskew) > 0.01: # Only log if there is significant skew
self.logger.info(f"Applied deskew correction: {slope_deskew:.2f} degrees")
self.logger.info(f"Deskewing took {time.time() - t1:.1f}s")
elif num_col_classifier in (1,2): elif num_col_classifier in (1,2):
org_h_l_m = textline_mask_tot_ea.shape[0] org_h_l_m = textline_mask_tot_ea.shape[0]
org_w_l_m = textline_mask_tot_ea.shape[1] org_w_l_m = textline_mask_tot_ea.shape[1]
@ -4704,6 +4755,13 @@ class Eynollah:
self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines, self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction) num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
self.logger.info("Step 3/5: Text Line Detection")
if self.curved_line:
self.logger.info("Mode: Curved line detection")
elif self.textline_light:
self.logger.info("Mode: Light detection")
if self.light_version and num_col_classifier in (1,2): if self.light_version and num_col_classifier in (1,2):
image_page = resize_image(image_page,org_h_l_m, org_w_l_m ) image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m ) textline_mask_tot_ea = resize_image(textline_mask_tot_ea,org_h_l_m, org_w_l_m )
@ -4713,8 +4771,7 @@ class Eynollah:
table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m ) table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m ) image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
self.logger.info("detection of marginals took %.1fs", time.time() - t1) self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s")
#print("text region early 2 marginal in %.1fs", time.time() - t0)
## birdan sora chock chakir ## birdan sora chock chakir
t1 = time.time() t1 = time.time()
if not self.full_layout: if not self.full_layout:
@ -4812,7 +4869,7 @@ class Eynollah:
cx_bigest_d_big[0] = cx_bigest_d[ind_largest] cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
cy_biggest_d_big[0] = cy_biggest_d[ind_largest] cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
except Exception as why: except Exception as why:
self.logger.error(why) self.logger.error(str(why))
(h, w) = text_only.shape[:2] (h, w) = text_only.shape[:2]
center = (w // 2.0, h // 2.0) center = (w // 2.0, h // 2.0)
@ -5034,6 +5091,15 @@ class Eynollah:
t_order = time.time() t_order = time.time()
if self.full_layout: if self.full_layout:
self.logger.info(ep 4/5: Reading Order Detection")
if self.reading_order_machine_based:
self.logger.info("Using machine-based detection")
if self.right2left:
self.logger.info("Right-to-left mode enabled")
if self.headers_off:
self.logger.info("Headers ignored in reading order")
if self.reading_order_machine_based: if self.reading_order_machine_based:
tror = time.time() tror = time.time()
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
@ -5045,9 +5111,16 @@ class Eynollah:
else: else:
order_text_new, id_of_texts_tot = self.do_order_of_regions( order_text_new, id_of_texts_tot = self.do_order_of_regions(
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d) contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
self.logger.info("detection of reading order took %.1fs", time.time() - t_order) self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
if self.ocr and not self.tr: if self.ocr and not self.tr:
self.logger.info("Step 4.5/5: OCR Processing")
if torch.cuda.is_available():
self.logger.info("Using GPU acceleration")
else:
self.logger.info("Using CPU processing")
gc.collect() gc.collect()
if len(all_found_textline_polygons)>0: if len(all_found_textline_polygons)>0:
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line) ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
@ -5079,15 +5152,54 @@ class Eynollah:
ocr_all_textlines_marginals_right = None ocr_all_textlines_marginals_right = None
ocr_all_textlines_h = None ocr_all_textlines_h = None
ocr_all_textlines_drop = None ocr_all_textlines_drop = None
self.logger.info("Step 5/5: Output Generation")
output_config = []
if self.enable_plotting:
output_config.append("Saving debug plots")
if self.dir_of_cropped_images:
output_config.append(f"Saving cropped images to: {self.dir_of_cropped_images}")
if self.dir_of_layout:
output_config.append(f"Saving layout plots to: {self.dir_of_layout}")
if self.dir_of_deskewed:
output_config.append(f"Saving deskewed images to: {self.dir_of_deskewed}")
if output_config:
self.logger.info("Output configuration:\n * %s", "\n * ".join(output_config))
pcgts = self.writer.build_pagexml_full_layout( pcgts = self.writer.build_pagexml_full_layout(
contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot, contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right, polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h) cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h)
summary = [
f"Total processing time: {time.time() - t0:.1f}s",
f"Output file: {self.writer.output_filename}"
]
if self.ocr:
summary.append("OCR processing completed")
if self.full_layout:
summary.append("Full layout analysis completed")
if self.tables:
summary.append("Table detection completed")
self.logger.info(f"Summary: {summary}")
return pcgts return pcgts
contours_only_text_parent_h = None contours_only_text_parent_h = None
self.logger.info("Step 4/5: Reading Order Detection")
if self.reading_order_machine_based:
self.logger.info("Using machine-based detection")
if self.right2left:
self.logger.info("Right-to-left mode enabled")
if self.headers_off:
self.logger.info("Headers ignored in reading order")
if self.reading_order_machine_based: if self.reading_order_machine_based:
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model( order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
contours_only_text_parent, contours_only_text_parent_h, text_regions_p) contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
@ -5108,6 +5220,21 @@ class Eynollah:
contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d) contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
if self.ocr and self.tr: if self.ocr and self.tr:
self.logger.info("Step 4.5/5: OCR Processing")
if torch.cuda.is_available():
self.logger.info("Using GPU acceleration")
else:
self.logger.info("Using CPU processing")
if self.light_version:
self.logger.info("Using light version OCR")
if self.textline_light:
self.logger.info("Using light text line detection for OCR")
self.logger.info("Processing text lines...")
device = cuda.get_current_device() device = cuda.get_current_device()
device.reset() device.reset()
gc.collect() gc.collect()
@ -5170,13 +5297,32 @@ class Eynollah:
ocr_all_textlines = None ocr_all_textlines = None
ocr_all_textlines_marginals_left = None ocr_all_textlines_marginals_left = None
ocr_all_textlines_marginals_right = None ocr_all_textlines_marginals_right = None
self.logger.info("detection of reading order took %.1fs", time.time() - t_order) self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
self.logger.info("Step 5/5: Output Generation")
self.logger.info("Generating PAGE-XML output")
if self.enable_plotting:
self.logger.info("Saving debug plots")
if self.dir_of_cropped_images:
self.logger.info(f"Saving cropped images to: {self.dir_of_cropped_images}")
if self.dir_of_layout:
self.logger.info(f"Saving layout plots to: {self.dir_of_layout}")
if self.dir_of_deskewed:
self.logger.info(f"Saving deskewed images to: {self.dir_of_deskewed}")
pcgts = self.writer.build_pagexml_no_full_layout( pcgts = self.writer.build_pagexml_no_full_layout(
txt_con_org, page_coord, order_text_new, id_of_texts_tot, txt_con_org, page_coord, order_text_new, id_of_texts_tot,
all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right, all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right, all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right,
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions) cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
self.logger.info(f"Output file: {self.writer.output_filename}")
return pcgts return pcgts