mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-06 14:39:55 +02:00
Merge remote-tracking branch 'michalbubula/add-feedback' into prepare-release-v0.5.0
This commit is contained in:
commit
9ead58b99a
3 changed files with 187 additions and 25 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,3 +5,4 @@ models_eynollah*
|
|||
output.html
|
||||
/build
|
||||
/dist
|
||||
*.tif
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import sys
|
||||
import click
|
||||
import logging
|
||||
from ocrd_utils import initLogging, getLevelName, getLogger
|
||||
from eynollah.eynollah import Eynollah, Eynollah_ocr
|
||||
from eynollah.sbb_binarize import SbbBinarizer
|
||||
|
@ -335,15 +336,30 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
|
|||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will ignore layout detection and reading order. It means that textline detection will be done within printspace and contours of textline will be written in xml output file.",
|
||||
)
|
||||
# TODO move to top-level CLI context
|
||||
@click.option(
|
||||
"--log_level",
|
||||
"-l",
|
||||
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
|
||||
help="Override log level globally to this",
|
||||
help="Override 'eynollah' log level globally to this",
|
||||
)
|
||||
#
|
||||
@click.option(
|
||||
"--setup-logging",
|
||||
is_flag=True,
|
||||
help="Setup a basic console logger",
|
||||
)
|
||||
|
||||
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level):
|
||||
initLogging()
|
||||
def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging):
|
||||
if setup_logging:
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(logging.INFO)
|
||||
formatter = logging.Formatter('%(message)s')
|
||||
console_handler.setFormatter(formatter)
|
||||
getLogger('eynollah').addHandler(console_handler)
|
||||
getLogger('eynollah').setLevel(logging.INFO)
|
||||
else:
|
||||
initLogging()
|
||||
if log_level:
|
||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
|
||||
|
@ -367,7 +383,6 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
assert image or dir_in, "Either a single image -i or a dir_in -di is required"
|
||||
eynollah = Eynollah(
|
||||
model,
|
||||
logger=getLogger('eynollah'),
|
||||
dir_out=out,
|
||||
dir_of_cropped_images=save_images,
|
||||
extract_only_images=extract_only_images,
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
document layout analysis (segmentation) with output in PAGE-XML
|
||||
"""
|
||||
|
||||
from logging import Logger
|
||||
from difflib import SequenceMatcher as sq
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import math
|
||||
|
@ -219,8 +218,9 @@ class Eynollah:
|
|||
threshold_art_class_layout: Optional[float] = None,
|
||||
threshold_art_class_textline: Optional[float] = None,
|
||||
skip_layout_and_reading_order : bool = False,
|
||||
logger : Optional[Logger] = None,
|
||||
):
|
||||
self.logger = getLogger('eynollah')
|
||||
|
||||
if skip_layout_and_reading_order:
|
||||
textline_light = True
|
||||
self.light_version = light_version
|
||||
|
@ -267,10 +267,6 @@ class Eynollah:
|
|||
else:
|
||||
self.threshold_art_class_textline = 0.1
|
||||
|
||||
self.logger = logger if logger else getLogger('eynollah')
|
||||
# for parallelization of CPU-intensive tasks:
|
||||
self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
|
||||
atexit.register(self.executor.shutdown)
|
||||
self.dir_models = dir_models
|
||||
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425"
|
||||
self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425"
|
||||
|
@ -325,7 +321,14 @@ class Eynollah:
|
|||
self.model_table_dir = dir_models + "/modelens_table_0t4_201124"
|
||||
else:
|
||||
self.model_table_dir = dir_models + "/eynollah-tables_20210319"
|
||||
|
||||
|
||||
t_start = time.time()
|
||||
|
||||
# for parallelization of CPU-intensive tasks:
|
||||
self.executor = ProcessPoolExecutor(max_workers=cpu_count(), timeout=1200)
|
||||
atexit.register(self.executor.shutdown)
|
||||
|
||||
# #gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
|
||||
# #gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=7.7, allow_growth=True)
|
||||
# #session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
|
||||
|
@ -339,7 +342,9 @@ class Eynollah:
|
|||
tf.config.experimental.set_memory_growth(device, True)
|
||||
except:
|
||||
self.logger.warning("no GPU device available")
|
||||
|
||||
|
||||
self.logger.info("Loading models...")
|
||||
|
||||
self.model_page = self.our_load_model(self.model_page_dir)
|
||||
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
|
||||
self.model_bin = self.our_load_model(self.model_dir_of_binarization)
|
||||
|
@ -392,6 +397,8 @@ class Eynollah:
|
|||
|
||||
if self.tables:
|
||||
self.model_table = self.our_load_model(self.model_table_dir)
|
||||
|
||||
self.logger.info(f"Model initialization complete ({time.time() - t_start:.1f}s)")
|
||||
|
||||
def cache_images(self, image_filename=None, image_pil=None, dpi=None):
|
||||
ret = {}
|
||||
|
@ -4548,19 +4555,48 @@ class Eynollah:
|
|||
|
||||
pcgts = self.run_single()
|
||||
self.logger.info("Job done in %.1fs", time.time() - t0)
|
||||
#print("Job done in %.1fs" % (time.time() - t0))
|
||||
self.writer.write_pagexml(pcgts)
|
||||
|
||||
if dir_in:
|
||||
self.logger.info("All jobs done in %.1fs", time.time() - t0_tot)
|
||||
print("all Job done in %.1fs", time.time() - t0_tot)
|
||||
|
||||
def run_single(self):
|
||||
t0 = time.time()
|
||||
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
|
||||
self.logger.info("Enhancing took %.1fs ", time.time() - t0)
|
||||
|
||||
self.logger.info(f"Processing file: {self.writer.image_filename}")
|
||||
|
||||
# Log enabled features directly
|
||||
enabled_modes = []
|
||||
if self.light_version:
|
||||
enabled_modes.append("Light version")
|
||||
if self.textline_light:
|
||||
enabled_modes.append("Light textline detection")
|
||||
if self.full_layout:
|
||||
enabled_modes.append("Full layout analysis")
|
||||
if self.ocr:
|
||||
enabled_modes.append("OCR")
|
||||
if self.tables:
|
||||
enabled_modes.append("Table detection")
|
||||
|
||||
if enabled_modes:
|
||||
self.logger.info("Enabled modes: " + ", ".join(enabled_modes))
|
||||
|
||||
|
||||
self.logger.info("Step 1/5: Image Enhancement")
|
||||
|
||||
img_res, is_image_enhanced, num_col_classifier, num_column_is_classified = self.run_enhancement(self.light_version)
|
||||
|
||||
self.logger.info(f"Image: {self.image.shape[1]}x{self.image.shape[0]}, {self.dpi} DPI, {num_col_classifier} columns")
|
||||
if is_image_enhanced:
|
||||
self.logger.info("Enhancement applied")
|
||||
|
||||
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
|
||||
|
||||
|
||||
# Image Extraction Mode
|
||||
if self.extract_only_images:
|
||||
self.logger.info("Step 2/5: Image Extraction Mode")
|
||||
|
||||
text_regions_p_1, erosion_hurts, polygons_lines_xml, polygons_of_images, image_page, page_coord, cont_page = \
|
||||
self.get_regions_light_v_extract_only_images(img_res, is_image_enhanced, num_col_classifier)
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||
|
@ -4569,9 +4605,15 @@ class Eynollah:
|
|||
cont_page, [], [])
|
||||
if self.plotter:
|
||||
self.plotter.write_images_into_directory(polygons_of_images, image_page)
|
||||
|
||||
self.logger.info("Image extraction complete")
|
||||
return pcgts
|
||||
|
||||
# Basic Processing Mode
|
||||
if self.skip_layout_and_reading_order:
|
||||
self.logger.info("Step 2/5: Basic Processing Mode")
|
||||
self.logger.info("Skipping layout analysis and reading order detection")
|
||||
|
||||
_ ,_, _, textline_mask_tot_ea, img_bin_light, _ = \
|
||||
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier,
|
||||
skip_layout_and_reading_order=self.skip_layout_and_reading_order)
|
||||
|
@ -4623,11 +4665,15 @@ class Eynollah:
|
|||
all_found_textline_polygons, page_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines=ocr_all_textlines, conf_contours_textregion=conf_contours_textregions, skip_layout_reading_order=self.skip_layout_and_reading_order)
|
||||
self.logger.info("Basic processing complete")
|
||||
return pcgts
|
||||
|
||||
#print("text region early -1 in %.1fs", time.time() - t0)
|
||||
t1 = time.time()
|
||||
self.logger.info("Step 2/5: Layout Analysis")
|
||||
|
||||
if self.light_version:
|
||||
self.logger.info("Using light version processing")
|
||||
text_regions_p_1 ,erosion_hurts, polygons_lines_xml, textline_mask_tot_ea, img_bin_light, confidence_matrix = \
|
||||
self.get_regions_light_v(img_res, is_image_enhanced, num_col_classifier)
|
||||
#print("text region early -2 in %.1fs", time.time() - t0)
|
||||
|
@ -4658,19 +4704,22 @@ class Eynollah:
|
|||
text_regions_p_1 ,erosion_hurts, polygons_lines_xml = \
|
||||
self.get_regions_from_xy_2models(img_res, is_image_enhanced,
|
||||
num_col_classifier)
|
||||
self.logger.info("Textregion detection took %.1fs ", time.time() - t1)
|
||||
self.logger.info(f"Textregion detection took {time.time() - t1:.1f}s")
|
||||
confidence_matrix = np.zeros((text_regions_p_1.shape[:2]))
|
||||
|
||||
t1 = time.time()
|
||||
num_col, num_col_classifier, img_only_regions, page_coord, image_page, mask_images, mask_lines, \
|
||||
text_regions_p_1, cont_page, table_prediction = \
|
||||
self.run_graphics_and_columns(text_regions_p_1, num_col_classifier, num_column_is_classified, erosion_hurts)
|
||||
self.logger.info("Graphics detection took %.1fs ", time.time() - t1)
|
||||
self.logger.info(f"Graphics detection took {time.time() - t1:.1f}s")
|
||||
#self.logger.info('cont_page %s', cont_page)
|
||||
#plt.imshow(table_prediction)
|
||||
#plt.show()
|
||||
self.logger.info(f"Layout analysis complete ({time.time() - t1:.1f}s)")
|
||||
|
||||
if not num_col:
|
||||
self.logger.info("No columns detected, outputting an empty PAGE-XML")
|
||||
self.logger.info("No columns detected - generating empty PAGE-XML")
|
||||
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||
[], page_coord, [], [], [], [], [], [], [], [], [], [], [], [], [], [],
|
||||
cont_page, [], [])
|
||||
|
@ -4680,10 +4729,12 @@ class Eynollah:
|
|||
t1 = time.time()
|
||||
if not self.light_version:
|
||||
textline_mask_tot_ea = self.run_textline(image_page)
|
||||
self.logger.info("textline detection took %.1fs", time.time() - t1)
|
||||
self.logger.info(f"Textline detection took {time.time() - t1:.1f}s")
|
||||
t1 = time.time()
|
||||
slope_deskew, slope_first = self.run_deskew(textline_mask_tot_ea)
|
||||
self.logger.info("deskewing took %.1fs", time.time() - t1)
|
||||
if np.abs(slope_deskew) > 0.01: # Only log if there is significant skew
|
||||
self.logger.info(f"Applied deskew correction: {slope_deskew:.2f} degrees")
|
||||
self.logger.info(f"Deskewing took {time.time() - t1:.1f}s")
|
||||
elif num_col_classifier in (1,2):
|
||||
org_h_l_m = textline_mask_tot_ea.shape[0]
|
||||
org_w_l_m = textline_mask_tot_ea.shape[1]
|
||||
|
@ -4703,6 +4754,13 @@ class Eynollah:
|
|||
textline_mask_tot, text_regions_p, image_page_rotated = \
|
||||
self.run_marginals(image_page, textline_mask_tot_ea, mask_images, mask_lines,
|
||||
num_col_classifier, slope_deskew, text_regions_p_1, table_prediction)
|
||||
|
||||
self.logger.info("Step 3/5: Text Line Detection")
|
||||
|
||||
if self.curved_line:
|
||||
self.logger.info("Mode: Curved line detection")
|
||||
elif self.textline_light:
|
||||
self.logger.info("Mode: Light detection")
|
||||
|
||||
if self.light_version and num_col_classifier in (1,2):
|
||||
image_page = resize_image(image_page,org_h_l_m, org_w_l_m )
|
||||
|
@ -4713,8 +4771,7 @@ class Eynollah:
|
|||
table_prediction = resize_image(table_prediction,org_h_l_m, org_w_l_m )
|
||||
image_page_rotated = resize_image(image_page_rotated,org_h_l_m, org_w_l_m )
|
||||
|
||||
self.logger.info("detection of marginals took %.1fs", time.time() - t1)
|
||||
#print("text region early 2 marginal in %.1fs", time.time() - t0)
|
||||
self.logger.info(f"Detection of marginals took {time.time() - t1:.1f}s")
|
||||
## birdan sora chock chakir
|
||||
t1 = time.time()
|
||||
if not self.full_layout:
|
||||
|
@ -4812,7 +4869,7 @@ class Eynollah:
|
|||
cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
|
||||
cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
|
||||
except Exception as why:
|
||||
self.logger.error(why)
|
||||
self.logger.error(str(why))
|
||||
|
||||
(h, w) = text_only.shape[:2]
|
||||
center = (w // 2.0, h // 2.0)
|
||||
|
@ -5034,6 +5091,15 @@ class Eynollah:
|
|||
t_order = time.time()
|
||||
|
||||
if self.full_layout:
|
||||
self.logger.info(ep 4/5: Reading Order Detection")
|
||||
|
||||
if self.reading_order_machine_based:
|
||||
self.logger.info("Using machine-based detection")
|
||||
if self.right2left:
|
||||
self.logger.info("Right-to-left mode enabled")
|
||||
if self.headers_off:
|
||||
self.logger.info("Headers ignored in reading order")
|
||||
|
||||
if self.reading_order_machine_based:
|
||||
tror = time.time()
|
||||
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
|
||||
|
@ -5045,9 +5111,16 @@ class Eynollah:
|
|||
else:
|
||||
order_text_new, id_of_texts_tot = self.do_order_of_regions(
|
||||
contours_only_text_parent_d_ordered, contours_only_text_parent_h_d_ordered, boxes_d, textline_mask_tot_d)
|
||||
self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
|
||||
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
|
||||
|
||||
if self.ocr and not self.tr:
|
||||
self.logger.info("Step 4.5/5: OCR Processing")
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self.logger.info("Using GPU acceleration")
|
||||
else:
|
||||
self.logger.info("Using CPU processing")
|
||||
|
||||
gc.collect()
|
||||
if len(all_found_textline_polygons)>0:
|
||||
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(image_page, all_found_textline_polygons, self.prediction_model, self.b_s_ocr, self.num_to_char, self.textline_light, self.curved_line)
|
||||
|
@ -5079,15 +5152,54 @@ class Eynollah:
|
|||
ocr_all_textlines_marginals_right = None
|
||||
ocr_all_textlines_h = None
|
||||
ocr_all_textlines_drop = None
|
||||
|
||||
self.logger.info("Step 5/5: Output Generation")
|
||||
|
||||
output_config = []
|
||||
if self.enable_plotting:
|
||||
output_config.append("Saving debug plots")
|
||||
if self.dir_of_cropped_images:
|
||||
output_config.append(f"Saving cropped images to: {self.dir_of_cropped_images}")
|
||||
if self.dir_of_layout:
|
||||
output_config.append(f"Saving layout plots to: {self.dir_of_layout}")
|
||||
if self.dir_of_deskewed:
|
||||
output_config.append(f"Saving deskewed images to: {self.dir_of_deskewed}")
|
||||
|
||||
if output_config:
|
||||
self.logger.info("Output configuration:\n * %s", "\n * ".join(output_config))
|
||||
|
||||
pcgts = self.writer.build_pagexml_full_layout(
|
||||
contours_only_text_parent, contours_only_text_parent_h, page_coord, order_text_new, id_of_texts_tot,
|
||||
all_found_textline_polygons, all_found_textline_polygons_h, all_box_coord, all_box_coord_h,
|
||||
polygons_of_images, contours_tables, polygons_of_drop_capitals, polygons_of_marginals_left, polygons_of_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_lines_xml, ocr_all_textlines, ocr_all_textlines_h, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, ocr_all_textlines_drop, conf_contours_textregions, conf_contours_textregions_h)
|
||||
|
||||
summary = [
|
||||
f"Total processing time: {time.time() - t0:.1f}s",
|
||||
f"Output file: {self.writer.output_filename}"
|
||||
]
|
||||
|
||||
if self.ocr:
|
||||
summary.append("OCR processing completed")
|
||||
if self.full_layout:
|
||||
summary.append("Full layout analysis completed")
|
||||
if self.tables:
|
||||
summary.append("Table detection completed")
|
||||
self.logger.info(f"Summary: {summary}")
|
||||
|
||||
return pcgts
|
||||
|
||||
contours_only_text_parent_h = None
|
||||
self.logger.info("Step 4/5: Reading Order Detection")
|
||||
|
||||
if self.reading_order_machine_based:
|
||||
self.logger.info("Using machine-based detection")
|
||||
if self.right2left:
|
||||
self.logger.info("Right-to-left mode enabled")
|
||||
if self.headers_off:
|
||||
self.logger.info("Headers ignored in reading order")
|
||||
|
||||
if self.reading_order_machine_based:
|
||||
order_text_new, id_of_texts_tot = self.do_order_of_regions_with_model(
|
||||
contours_only_text_parent, contours_only_text_parent_h, text_regions_p)
|
||||
|
@ -5108,6 +5220,21 @@ class Eynollah:
|
|||
contours_only_text_parent_d_ordered, contours_only_text_parent_h, boxes_d, textline_mask_tot_d)
|
||||
|
||||
if self.ocr and self.tr:
|
||||
self.logger.info("Step 4.5/5: OCR Processing")
|
||||
|
||||
if torch.cuda.is_available():
|
||||
self.logger.info("Using GPU acceleration")
|
||||
else:
|
||||
self.logger.info("Using CPU processing")
|
||||
|
||||
if self.light_version:
|
||||
self.logger.info("Using light version OCR")
|
||||
|
||||
if self.textline_light:
|
||||
self.logger.info("Using light text line detection for OCR")
|
||||
|
||||
self.logger.info("Processing text lines...")
|
||||
|
||||
device = cuda.get_current_device()
|
||||
device.reset()
|
||||
gc.collect()
|
||||
|
@ -5170,13 +5297,32 @@ class Eynollah:
|
|||
ocr_all_textlines = None
|
||||
ocr_all_textlines_marginals_left = None
|
||||
ocr_all_textlines_marginals_right = None
|
||||
self.logger.info("detection of reading order took %.1fs", time.time() - t_order)
|
||||
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
|
||||
|
||||
self.logger.info("Step 5/5: Output Generation")
|
||||
|
||||
self.logger.info("Generating PAGE-XML output")
|
||||
|
||||
if self.enable_plotting:
|
||||
self.logger.info("Saving debug plots")
|
||||
|
||||
if self.dir_of_cropped_images:
|
||||
self.logger.info(f"Saving cropped images to: {self.dir_of_cropped_images}")
|
||||
|
||||
if self.dir_of_layout:
|
||||
self.logger.info(f"Saving layout plots to: {self.dir_of_layout}")
|
||||
|
||||
if self.dir_of_deskewed:
|
||||
self.logger.info(f"Saving deskewed images to: {self.dir_of_deskewed}")
|
||||
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||
txt_con_org, page_coord, order_text_new, id_of_texts_tot,
|
||||
all_found_textline_polygons, all_box_coord, polygons_of_images, polygons_of_marginals_left, polygons_of_marginals_right,
|
||||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right, all_box_coord_marginals_left, all_box_coord_marginals_right, slopes, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_lines_xml, contours_tables, ocr_all_textlines, ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right, conf_contours_textregions)
|
||||
|
||||
self.logger.info(f"Output file: {self.writer.output_filename}")
|
||||
|
||||
return pcgts
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue