This commit is contained in:
Konstantin Baierer 2025-10-22 15:05:46 +00:00 committed by GitHub
commit 7cf6ae1d7a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 1962 additions and 1341 deletions

View file

@ -6,21 +6,23 @@ EXTRAS ?=
DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest DOCKER_BASE_IMAGE ?= docker.io/ocrd/core-cuda-tf2:latest
DOCKER_TAG ?= ocrd/eynollah DOCKER_TAG ?= ocrd/eynollah
DOCKER ?= docker DOCKER ?= docker
WGET = wget -O
#SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz #SEG_MODEL := https://qurator-data.de/eynollah/2021-04-25/models_eynollah.tar.gz
#SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz #SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah_renamed.tar.gz
# SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz # SEG_MODEL := https://qurator-data.de/eynollah/2022-04-05/models_eynollah.tar.gz
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.0/models_eynollah.tar.gz
#SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz #SEG_MODEL := https://github.com/qurator-spk/eynollah/releases/download/v0.3.1/models_eynollah.tar.gz
SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1 #SEG_MODEL := https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1
SEG_MODEL := https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1
SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL))) SEG_MODELFILE = $(notdir $(patsubst %?download=1,%,$(SEG_MODEL)))
SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%) SEG_MODELNAME = $(SEG_MODELFILE:%.tar.gz=%)
BIN_MODEL := https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip BIN_MODEL := https://zenodo.org/records/17295988/files/models_binarization_v0_6_0.tar.gz?download=1
BIN_MODELFILE = $(notdir $(BIN_MODEL)) BIN_MODELFILE = $(notdir $(BIN_MODEL))
BIN_MODELNAME := default-2021-03-09 BIN_MODELNAME := default-2021-03-09
OCR_MODEL := https://zenodo.org/records/17236998/files/models_ocr_v0_5_1.tar.gz?download=1 OCR_MODEL := https://zenodo.org/records/17295988/files/models_ocr_v0_6_0.tar.gz?download=1
OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL))) OCR_MODELFILE = $(notdir $(patsubst %?download=1,%,$(OCR_MODEL)))
OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%) OCR_MODELNAME = $(OCR_MODELFILE:%.tar.gz=%)
@ -55,22 +57,21 @@ help:
# END-EVAL # END-EVAL
# Download and extract models to $(PWD)/models_layout_v0_5_0 # Download and extract models to $(PWD)/models_layout_v0_6_0
models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME) models: $(BIN_MODELNAME) $(SEG_MODELNAME) $(OCR_MODELNAME)
# do not download these files if we already have the directories # do not download these files if we already have the directories
.INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE) .INTERMEDIATE: $(BIN_MODELFILE) $(SEG_MODELFILE) $(OCR_MODELFILE)
$(BIN_MODELFILE): $(BIN_MODELFILE):
wget -O $@ $(BIN_MODEL) $(WGET) $@ $(BIN_MODEL)
$(SEG_MODELFILE): $(SEG_MODELFILE):
wget -O $@ $(SEG_MODEL) $(WGET) $@ $(SEG_MODEL)
$(OCR_MODELFILE): $(OCR_MODELFILE):
wget -O $@ $(OCR_MODEL) $(WGET) $@ $(OCR_MODEL)
$(BIN_MODELNAME): $(BIN_MODELFILE) $(BIN_MODELNAME): $(BIN_MODELFILE)
mkdir $@ tar zxf $<
unzip -d $@ $<
$(SEG_MODELNAME): $(SEG_MODELFILE) $(SEG_MODELNAME): $(SEG_MODELFILE)
tar zxf $< tar zxf $<
$(OCR_MODELNAME): $(OCR_MODELFILE) $(OCR_MODELNAME): $(OCR_MODELFILE)

View file

@ -55,7 +55,7 @@ make install EXTRAS=OCR
## Models ## Models
Pretrained models can be downloaded from [zenodo](https://zenodo.org/records/17194824) or [huggingface](https://huggingface.co/SBB?search_models=eynollah). Pretrained models can be downloaded from [zenodo](https://doi.org/10.5281/zenodo.17194823) or [huggingface](https://huggingface.co/SBB?search_models=eynollah).
For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md). For documentation on models, have a look at [`models.md`](https://github.com/qurator-spk/eynollah/tree/main/docs/models.md).
Model cards are also provided for our trained models. Model cards are also provided for our trained models.
@ -162,7 +162,7 @@ formally described in [`ocrd-tool.json`](https://github.com/qurator-spk/eynollah
In this case, the source image file group with (preferably) RGB images should be used as input like this: In this case, the source image file group with (preferably) RGB images should be used as input like this:
ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_5_0 ocrd-eynollah-segment -I OCR-D-IMG -O OCR-D-SEG -P models eynollah_layout_v0_6_0
If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows: If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynollah behaves as follows:
- existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results) - existing regions are kept and ignored (i.e. in effect they might overlap segments from Eynollah results)
@ -174,7 +174,7 @@ If the input file group is PAGE-XML (from a previous OCR-D workflow step), Eynol
(because some other preprocessing step was in effect like `denoised`), then (because some other preprocessing step was in effect like `denoised`), then
the output PAGE-XML will be based on that as new top-level (`@imageFilename`) the output PAGE-XML will be based on that as new top-level (`@imageFilename`)
ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_5_0 ocrd-eynollah-segment -I OCR-D-XYZ -O OCR-D-SEG -P models eynollah_layout_v0_6_0
In general, it makes more sense to add other workflow steps **after** Eynollah. In general, it makes more sense to add other workflow steps **after** Eynollah.

View file

@ -6,3 +6,4 @@ tensorflow < 2.13
numba <= 0.58.1 numba <= 0.58.1
scikit-image scikit-image
biopython biopython
tabulate

View file

@ -1,16 +1,24 @@
from dataclasses import dataclass
import sys import sys
import click import click
import logging import logging
from typing import Tuple, List
from ocrd_utils import initLogging, getLevelName, getLogger from ocrd_utils import initLogging, getLevelName, getLogger
from eynollah.eynollah import Eynollah, Eynollah_ocr from eynollah.eynollah import Eynollah
from eynollah.eynollah_ocr import Eynollah_ocr
from eynollah.sbb_binarize import SbbBinarizer from eynollah.sbb_binarize import SbbBinarizer
from eynollah.image_enhancer import Enhancer from eynollah.image_enhancer import Enhancer
from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout from eynollah.mb_ro_on_layout import machine_based_reading_order_on_layout
from eynollah.model_zoo import EynollahModelZoo
from .cli_models import models_cli
@click.group() @click.group()
def main(): def main():
pass pass
main.add_command(models_cli, 'models')
@main.command() @main.command()
@click.option( @click.option(
"--input", "--input",
@ -79,18 +87,38 @@ def machine_based_reading_order(input, dir_in, out, model, log_level):
type=click.Path(file_okay=True, dir_okay=True), type=click.Path(file_okay=True, dir_okay=True),
required=True, required=True,
) )
@click.option(
'-M',
'--mode',
type=click.Choice(['single', 'multi']),
default='single',
help="Whether to use the (faster) single-model binarization or the (slightly better) multi-model binarization"
)
@click.option( @click.option(
"--log_level", "--log_level",
"-l", "-l",
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this", help="Override log level globally to this",
) )
def binarization(patches, model_dir, input_image, dir_in, output, log_level): def binarization(
patches,
model_dir,
input_image,
mode,
dir_in,
output,
log_level,
):
assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(input_image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
binarizer = SbbBinarizer(model_dir) binarizer = SbbBinarizer(model_dir, mode=mode)
if log_level: if log_level:
binarizer.log.setLevel(getLevelName(log_level)) binarizer.log.setLevel(getLevelName(log_level))
binarizer.run(image_path=input_image, use_patches=patches, output=output, dir_in=dir_in) binarizer.run(
image_path=input_image,
use_patches=patches,
output=output,
dir_in=dir_in
)
@main.command() @main.command()
@ -198,15 +226,17 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
@click.option( @click.option(
"--model", "--model",
"-m", "-m",
'model_basedir',
help="directory of models", help="directory of models",
type=click.Path(exists=True, file_okay=False), type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True, required=True,
) )
@click.option( @click.option(
"--model_version", "--model_version",
"-mv", "-mv",
help="override default versions of model categories", help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str), type=(str, str, str),
multiple=True, multiple=True,
) )
@click.option( @click.option(
@ -380,7 +410,43 @@ def enhancement(image, out, overwrite, dir_in, model, num_col_upper, num_col_low
help="Setup a basic console logger", help="Setup a basic console logger",
) )
def layout(image, out, overwrite, dir_in, model, model_version, save_images, save_layout, save_deskewed, save_all, extract_only_images, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, transformer_ocr, batch_size_ocr, num_col_upper, num_col_lower, threshold_art_class_textline, threshold_art_class_layout, skip_layout_and_reading_order, ignore_page_extraction, log_level, setup_logging): def layout(
image,
out,
overwrite,
dir_in,
model_basedir,
model_version,
save_images,
save_layout,
save_deskewed,
save_all,
extract_only_images,
save_page,
enable_plotting,
allow_enhancement,
curved_line,
textline_light,
full_layout,
tables,
right2left,
input_binary,
allow_scaling,
headers_off,
light_version,
reading_order_machine_based,
do_ocr,
transformer_ocr,
batch_size_ocr,
num_col_upper,
num_col_lower,
threshold_art_class_textline,
threshold_art_class_layout,
skip_layout_and_reading_order,
ignore_page_extraction,
log_level,
setup_logging,
):
if setup_logging: if setup_logging:
console_handler = logging.StreamHandler(sys.stdout) console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO) console_handler.setLevel(logging.INFO)
@ -410,8 +476,8 @@ def layout(image, out, overwrite, dir_in, model, model_version, save_images, sav
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho" assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both." assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
eynollah = Eynollah( eynollah = Eynollah(
model, model_basedir,
model_versions=model_version, model_overrides=model_version,
extract_only_images=extract_only_images, extract_only_images=extract_only_images,
enable_plotting=enable_plotting, enable_plotting=enable_plotting,
allow_enhancement=allow_enhancement, allow_enhancement=allow_enhancement,

View file

@ -0,0 +1,93 @@
from dataclasses import dataclass
from pathlib import Path
from typing import List, Set, Tuple
import click
from eynollah.model_zoo.default_specs import MODELS_VERSION
from .model_zoo import EynollahModelZoo
@dataclass()
class EynollahCliCtx:
model_zoo: EynollahModelZoo
@click.group()
@click.pass_context
@click.option(
"--model",
"-m",
'model_basedir',
help="directory of models",
type=click.Path(exists=True, file_okay=False),
# default=f"{os.environ['HOME']}/.local/share/ocrd-resources/ocrd-eynollah-segment",
required=True,
)
@click.option(
"--model-overrides",
"-mv",
help="override default versions of model categories, syntax is 'CATEGORY VARIANT PATH', e.g 'region light /path/to/model'. See eynollah list-models for the full list",
type=(str, str, str),
multiple=True,
)
def models_cli(
ctx,
model_basedir: str,
model_overrides: List[Tuple[str, str, str]],
):
"""
Organize models for the various runners in eynollah.
"""
ctx.obj = EynollahCliCtx(model_zoo=EynollahModelZoo(basedir=model_basedir, model_overrides=model_overrides))
@models_cli.command('list')
@click.pass_context
def list_models(
ctx,
):
"""
List all the models in the zoo
"""
print(ctx.obj.model_zoo)
@models_cli.command('package')
@click.option(
'--set-version', '-V', 'version', help="Version to use for packaging", default=MODELS_VERSION, show_default=True
)
@click.argument('output_dir')
@click.pass_context
def package(
ctx,
version,
output_dir,
):
"""
Generate shell code to copy all the models in the zoo into properly named folders in OUTPUT_DIR for distribution.
eynollah models -m SRC package OUTPUT_DIR
SRC should contain a directory "models_eynollah" containing all the models.
"""
mkdirs: Set[Path] = set([])
copies: Set[Tuple[Path, Path]] = set([])
for spec in ctx.obj.model_zoo.specs.specs:
# skip these as they are dependent on the ocr model
if spec.category in ('num_to_char', 'characters'):
continue
src: Path = ctx.obj.model_zoo.model_path(spec.category, spec.variant)
# Only copy the top-most directory relative to models_eynollah
while src.parent.name != 'models_eynollah':
src = src.parent
for dist in spec.dists:
dist_dir = Path(f"{output_dir}/models_{dist}_{version}/models_eynollah")
copies.add((src, dist_dir))
mkdirs.add(dist_dir)
for dir in mkdirs:
print(f"mkdir -p {dir}")
for (src, dst) in copies:
print(f"cp -r {src} {dst}")
for dir in mkdirs:
zip_path = Path(f'../{dir.parent.name}.zip')
print(f"(cd {dir}/..; zip -r {zip_path} models_eynollah)")

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,998 @@
# pyright: reportPossiblyUnboundVariable=false
from logging import Logger, getLogger
from typing import Optional
from pathlib import Path
import os
import json
import gc
import sys
import math
import time
from keras.layers import StringLookup
import cv2
import xml.etree.ElementTree as ET
import tensorflow as tf
from keras.models import load_model
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from eynollah.model_zoo import EynollahModelZoo
try:
import torch
except ImportError:
torch = None
from .utils import is_image_filename
from .utils.resize import resize_image
from .utils.utils_ocr import (
break_curved_line_into_small_pieces_and_then_merge,
decode_batch_predictions,
fit_text_single_line,
get_contours_and_bounding_boxes,
get_orientation_moments,
preprocess_and_resize_image_for_ocrcnn_model,
return_textlines_split_if_needed,
rotate_image_with_padding,
)
# cannot use importlib.resources until we move to 3.9+ forimportlib.resources.files
if sys.version_info < (3, 10):
import importlib_resources
else:
import importlib.resources as importlib_resources
try:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
except ImportError:
TrOCRProcessor = VisionEncoderDecoderModel = None
class Eynollah_ocr:
def __init__(
self,
dir_models,
model_name=None,
dir_xmls=None,
tr_ocr=False,
batch_size: Optional[int]=None,
export_textline_images_and_text: bool=False,
do_not_mask_with_textline_contour: bool=False,
pref_of_dataset=None,
min_conf_value_of_textline_text : float=0.3,
logger: Optional[Logger]=None,
):
self.tr_ocr = tr_ocr
self.export_textline_images_and_text = export_textline_images_and_text
self.do_not_mask_with_textline_contour = do_not_mask_with_textline_contour
self.pref_of_dataset = pref_of_dataset
self.logger = logger if logger else getLogger('eynollah')
self.model_zoo = EynollahModelZoo(basedir=dir_models)
# TODO: Properly document what 'export_textline_images_and_text' is about
if export_textline_images_and_text:
self.logger.info("export_textline_images_and_text was set, so no actual models are loaded")
return
self.min_conf_value_of_textline_text = min_conf_value_of_textline_text
self.b_s = 2 if batch_size is None and tr_ocr else 8 if batch_size is None else batch_size
if tr_ocr:
self.model_zoo.load_model('trocr_processor', '')
if model_name:
self.model_zoo.load_model('ocr', 'tr', model_name)
else:
self.model_zoo.load_model('ocr', 'tr')
self.model_zoo.get('ocr').to(self.device)
else:
if model_name:
self.model_zoo.load_model('ocr', '', model_name)
else:
self.model_zoo.load_model('ocr', '')
self.model_zoo.load_model('num_to_char')
self.end_character = len(self.model_zoo.load_model('characters')) + 2
@property
def device(self):
if torch.cuda.is_available():
self.logger.info("Using GPU acceleration")
return torch.device("cuda:0")
else:
self.logger.info("Using CPU processing")
return torch.device("cpu")
def run(self, overwrite: bool = False,
dir_in: Optional[str] = None,
dir_in_bin: Optional[str] = None,
image_filename: Optional[str] = None,
dir_xmls: Optional[str] = None,
dir_out_image_text: Optional[str] = None,
dir_out: Optional[str] = None,
):
if dir_in:
ls_imgs = [os.path.join(dir_in, image_filename)
for image_filename in filter(is_image_filename,
os.listdir(dir_in))]
else:
assert image_filename
ls_imgs = [image_filename]
if self.tr_ocr:
tr_ocr_input_height_and_width = 384
for dir_img in ls_imgs:
file_name = Path(dir_img).stem
assert dir_xmls # FIXME: check the logic
dir_xml = os.path.join(dir_xmls, file_name+'.xml')
assert dir_out # FIXME: check the logic
out_file_ocr = os.path.join(dir_out, file_name+'.xml')
if os.path.exists(out_file_ocr):
if overwrite:
self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
else:
self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
continue
img = cv2.imread(dir_img)
if dir_out_image_text:
out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png')
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
draw = ImageDraw.Draw(image_text)
total_bb_coordinates = []
##file_name = Path(dir_xmls).stem
tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
root1=tree1.getroot()
alltags=[elem.tag for elem in root1.iter()]
link=alltags[0].split('}')[0]+'}'
name_space = alltags[0].split('}')[0]
name_space = name_space.split('{')[1]
region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')])
cropped_lines = []
cropped_lines_region_indexer = []
cropped_lines_meging_indexing = []
extracted_texts = []
indexer_text_region = 0
indexer_b_s = 0
for nn in root1.iter(region_tags):
for child_textregion in nn:
if child_textregion.tag.endswith("TextLine"):
for child_textlines in child_textregion:
if child_textlines.tag.endswith("Coords"):
cropped_lines_region_indexer.append(indexer_text_region)
p_h=child_textlines.attrib['points'].split(' ')
textline_coords = np.array( [ [int(x.split(',')[0]),
int(x.split(',')[1]) ]
for x in p_h] )
x,y,w,h = cv2.boundingRect(textline_coords)
if dir_out_image_text:
total_bb_coordinates.append([x,y,w,h])
h2w_ratio = h/float(w)
img_poly_on_img = np.copy(img)
mask_poly = np.zeros(img.shape)
mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
mask_poly = mask_poly[y:y+h, x:x+w, :]
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
img_crop[mask_poly==0] = 255
self.logger.debug("processing %d lines for '%s'",
len(cropped_lines), nn.attrib['id'])
if h2w_ratio > 0.1:
cropped_lines.append(resize_image(img_crop,
tr_ocr_input_height_and_width,
tr_ocr_input_height_and_width) )
cropped_lines_meging_indexing.append(0)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_zoo.get('ocr').generate(
pixel_values_merged.to(self.device))
generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
else:
splited_images, _ = return_textlines_split_if_needed(img_crop, None)
#print(splited_images)
if splited_images:
cropped_lines.append(resize_image(splited_images[0],
tr_ocr_input_height_and_width,
tr_ocr_input_height_and_width))
cropped_lines_meging_indexing.append(1)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_zoo.get('ocr').generate(
pixel_values_merged.to(self.device))
generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
cropped_lines.append(resize_image(splited_images[1],
tr_ocr_input_height_and_width,
tr_ocr_input_height_and_width))
cropped_lines_meging_indexing.append(-1)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_zoo.get('ocr').generate(
pixel_values_merged.to(self.device))
generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
else:
cropped_lines.append(img_crop)
cropped_lines_meging_indexing.append(0)
indexer_b_s+=1
if indexer_b_s==self.b_s:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_zoo.get('ocr').generate(
pixel_values_merged.to(self.device))
generated_text_merged = self.model_zoo.get('processor').batch_decode(
generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
indexer_text_region = indexer_text_region +1
if indexer_b_s!=0:
imgs = cropped_lines[:]
cropped_lines = []
indexer_b_s = 0
pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
generated_ids_merged = self.model_zoo.get('ocr').generate(pixel_values_merged.to(self.device))
generated_text_merged = self.model_zoo.get('processor').batch_decode(generated_ids_merged, skip_special_tokens=True)
extracted_texts = extracted_texts + generated_text_merged
####extracted_texts = []
####n_iterations = math.ceil(len(cropped_lines) / self.b_s)
####for i in range(n_iterations):
####if i==(n_iterations-1):
####n_start = i*self.b_s
####imgs = cropped_lines[n_start:]
####else:
####n_start = i*self.b_s
####n_end = (i+1)*self.b_s
####imgs = cropped_lines[n_start:n_end]
####pixel_values_merged = self.model_zoo.get('processor')(imgs, return_tensors="pt").pixel_values
####generated_ids_merged = self.model_ocr.generate(
#### pixel_values_merged.to(self.device))
####generated_text_merged = self.model_zoo.get('processor').batch_decode(
#### generated_ids_merged, skip_special_tokens=True)
####extracted_texts = extracted_texts + generated_text_merged
del cropped_lines
gc.collect()
extracted_texts_merged = [extracted_texts[ind]
if cropped_lines_meging_indexing[ind]==0
else extracted_texts[ind]+" "+extracted_texts[ind+1]
if cropped_lines_meging_indexing[ind]==1
else None
for ind in range(len(cropped_lines_meging_indexing))]
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
#print(extracted_texts_merged, len(extracted_texts_merged))
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
if dir_out_image_text:
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
with importlib_resources.as_file(font) as font:
font = ImageFont.truetype(font=font, size=40)
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
x_bb = bb_ind[0]
y_bb = bb_ind[1]
w_bb = bb_ind[2]
h_bb = bb_ind[3]
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
font.path, w_bb, int(h_bb*0.4) )
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
# Draw the text
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
image_text.save(out_image_with_text)
#print(len(unique_cropped_lines_region_indexer), 'unique_cropped_lines_region_indexer')
#######text_by_textregion = []
#######for ind in unique_cropped_lines_region_indexer:
#######ind = np.array(cropped_lines_region_indexer)==ind
#######extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
#######text_by_textregion.append(" ".join(extracted_texts_merged_un))
text_by_textregion = []
for ind in unique_cropped_lines_region_indexer:
ind = np.array(cropped_lines_region_indexer) == ind
extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
if len(extracted_texts_merged_un)>1:
text_by_textregion_ind = ""
next_glue = ""
for indt in range(len(extracted_texts_merged_un)):
if (extracted_texts_merged_un[indt].endswith('') or
extracted_texts_merged_un[indt].endswith('-') or
extracted_texts_merged_un[indt].endswith('¬')):
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
next_glue = ""
else:
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
next_glue = " "
text_by_textregion.append(text_by_textregion_ind)
else:
text_by_textregion.append(" ".join(extracted_texts_merged_un))
indexer = 0
indexer_textregion = 0
for nn in root1.iter(region_tags):
#id_textregion = nn.attrib['id']
#id_textregions.append(id_textregion)
#textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
is_textregion_text = False
for childtest in nn:
if childtest.tag.endswith("TextEquiv"):
is_textregion_text = True
if not is_textregion_text:
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
has_textline = False
for child_textregion in nn:
if child_textregion.tag.endswith("TextLine"):
is_textline_text = False
for childtest2 in child_textregion:
if childtest2.tag.endswith("TextEquiv"):
is_textline_text = True
if not is_textline_text:
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
##text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
unicode_textline.text = extracted_texts_merged[indexer]
else:
for childtest3 in child_textregion:
if childtest3.tag.endswith("TextEquiv"):
for child_uc in childtest3:
if child_uc.tag.endswith("Unicode"):
##childtest3.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
child_uc.text = extracted_texts_merged[indexer]
indexer = indexer + 1
has_textline = True
if has_textline:
if is_textregion_text:
for child4 in nn:
if child4.tag.endswith("TextEquiv"):
for childtr_uc in child4:
if childtr_uc.tag.endswith("Unicode"):
childtr_uc.text = text_by_textregion[indexer_textregion]
else:
unicode_textregion.text = text_by_textregion[indexer_textregion]
indexer_textregion = indexer_textregion + 1
###sample_order = [(id_to_order[tid], text)
### for tid, text in zip(id_textregions, textregions_by_existing_ids)
### if tid in id_to_order]
##ordered_texts_sample = [text for _, text in sorted(sample_order)]
##tot_page_text = ' '.join(ordered_texts_sample)
##for page_element in root1.iter(link+'Page'):
##text_page = ET.SubElement(page_element, 'TextEquiv')
##unicode_textpage = ET.SubElement(text_page, 'Unicode')
##unicode_textpage.text = tot_page_text
ET.register_namespace("",name_space)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
else:
###max_len = 280#512#280#512
###padding_token = 1500#299#1500#299
image_width = 512#max_len * 4
image_height = 32
img_size=(image_width, image_height)
for dir_img in ls_imgs:
file_name = Path(dir_img).stem
dir_xml = os.path.join(dir_xmls, file_name+'.xml')
out_file_ocr = os.path.join(dir_out, file_name+'.xml')
if os.path.exists(out_file_ocr):
if overwrite:
self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
else:
self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
continue
img = cv2.imread(dir_img)
if dir_in_bin is not None:
cropped_lines_bin = []
dir_img_bin = os.path.join(dir_in_bin, file_name+'.png')
img_bin = cv2.imread(dir_img_bin)
if dir_out_image_text:
out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png')
image_text = Image.new("RGB", (img.shape[1], img.shape[0]), "white")
draw = ImageDraw.Draw(image_text)
total_bb_coordinates = []
tree1 = ET.parse(dir_xml, parser = ET.XMLParser(encoding="utf-8"))
root1=tree1.getroot()
alltags=[elem.tag for elem in root1.iter()]
link=alltags[0].split('}')[0]+'}'
name_space = alltags[0].split('}')[0]
name_space = name_space.split('{')[1]
region_tags=np.unique([x for x in alltags if x.endswith('TextRegion')])
cropped_lines = []
cropped_lines_ver_index = []
cropped_lines_region_indexer = []
cropped_lines_meging_indexing = []
tinl = time.time()
indexer_text_region = 0
indexer_textlines = 0
for nn in root1.iter(region_tags):
try:
type_textregion = nn.attrib['type']
except:
type_textregion = 'paragraph'
for child_textregion in nn:
if child_textregion.tag.endswith("TextLine"):
for child_textlines in child_textregion:
if child_textlines.tag.endswith("Coords"):
cropped_lines_region_indexer.append(indexer_text_region)
p_h=child_textlines.attrib['points'].split(' ')
textline_coords = np.array( [ [int(x.split(',')[0]),
int(x.split(',')[1]) ]
for x in p_h] )
x,y,w,h = cv2.boundingRect(textline_coords)
angle_radians = math.atan2(h, w)
# Convert to degrees
angle_degrees = math.degrees(angle_radians)
if type_textregion=='drop-capital':
angle_degrees = 0
if dir_out_image_text:
total_bb_coordinates.append([x,y,w,h])
w_scaled = w * image_height/float(h)
img_poly_on_img = np.copy(img)
if dir_in_bin is not None:
img_poly_on_img_bin = np.copy(img_bin)
img_crop_bin = img_poly_on_img_bin[y:y+h, x:x+w, :]
mask_poly = np.zeros(img.shape)
mask_poly = cv2.fillPoly(mask_poly, pts=[textline_coords], color=(1, 1, 1))
mask_poly = mask_poly[y:y+h, x:x+w, :]
img_crop = img_poly_on_img[y:y+h, x:x+w, :]
if self.export_textline_images_and_text:
if not self.do_not_mask_with_textline_contour:
img_crop[mask_poly==0] = 255
else:
# print(file_name, angle_degrees, w*h,
# mask_poly[:,:,0].sum(),
# mask_poly[:,:,0].sum() /float(w*h) ,
# 'didi')
if angle_degrees > 3:
better_des_slope = get_orientation_moments(textline_coords)
img_crop = rotate_image_with_padding(img_crop, better_des_slope)
if dir_in_bin is not None:
img_crop_bin = rotate_image_with_padding(img_crop_bin, better_des_slope)
mask_poly = rotate_image_with_padding(mask_poly, better_des_slope)
mask_poly = mask_poly.astype('uint8')
#new bounding box
x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_poly[:,:,0])
mask_poly = mask_poly[y_n:y_n+h_n, x_n:x_n+w_n, :]
img_crop = img_crop[y_n:y_n+h_n, x_n:x_n+w_n, :]
if not self.do_not_mask_with_textline_contour:
img_crop[mask_poly==0] = 255
if dir_in_bin is not None:
img_crop_bin = img_crop_bin[y_n:y_n+h_n, x_n:x_n+w_n, :]
if not self.do_not_mask_with_textline_contour:
img_crop_bin[mask_poly==0] = 255
if mask_poly[:,:,0].sum() /float(w_n*h_n) < 0.50 and w_scaled > 90:
if dir_in_bin is not None:
img_crop, img_crop_bin = \
break_curved_line_into_small_pieces_and_then_merge(
img_crop, mask_poly, img_crop_bin)
else:
img_crop, _ = \
break_curved_line_into_small_pieces_and_then_merge(
img_crop, mask_poly)
else:
better_des_slope = 0
if not self.do_not_mask_with_textline_contour:
img_crop[mask_poly==0] = 255
if dir_in_bin is not None:
if not self.do_not_mask_with_textline_contour:
img_crop_bin[mask_poly==0] = 255
if type_textregion=='drop-capital':
pass
else:
if mask_poly[:,:,0].sum() /float(w*h) < 0.50 and w_scaled > 90:
if dir_in_bin is not None:
img_crop, img_crop_bin = \
break_curved_line_into_small_pieces_and_then_merge(
img_crop, mask_poly, img_crop_bin)
else:
img_crop, _ = \
break_curved_line_into_small_pieces_and_then_merge(
img_crop, mask_poly)
if not self.export_textline_images_and_text:
if w_scaled < 750:#1.5*image_width:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
img_crop, image_height, image_width)
cropped_lines.append(img_fin)
if abs(better_des_slope) > 45:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
cropped_lines_meging_indexing.append(0)
if dir_in_bin is not None:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
img_crop_bin, image_height, image_width)
cropped_lines_bin.append(img_fin)
else:
splited_images, splited_images_bin = return_textlines_split_if_needed(
img_crop, img_crop_bin if dir_in_bin is not None else None)
if splited_images:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
splited_images[0], image_height, image_width)
cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(1)
if abs(better_des_slope) > 45:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
splited_images[1], image_height, image_width)
cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(-1)
if abs(better_des_slope) > 45:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
if dir_in_bin is not None:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
splited_images_bin[0], image_height, image_width)
cropped_lines_bin.append(img_fin)
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
splited_images_bin[1], image_height, image_width)
cropped_lines_bin.append(img_fin)
else:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
img_crop, image_height, image_width)
cropped_lines.append(img_fin)
cropped_lines_meging_indexing.append(0)
if abs(better_des_slope) > 45:
cropped_lines_ver_index.append(1)
else:
cropped_lines_ver_index.append(0)
if dir_in_bin is not None:
img_fin = preprocess_and_resize_image_for_ocrcnn_model(
img_crop_bin, image_height, image_width)
cropped_lines_bin.append(img_fin)
if self.export_textline_images_and_text:
if img_crop.shape[0]==0 or img_crop.shape[1]==0:
pass
else:
if child_textlines.tag.endswith("TextEquiv"):
for cheild_text in child_textlines:
if cheild_text.tag.endswith("Unicode"):
textline_text = cheild_text.text
if textline_text:
base_name = os.path.join(
dir_out, file_name + '_line_' + str(indexer_textlines))
if self.pref_of_dataset:
base_name += '_' + self.pref_of_dataset
if not self.do_not_mask_with_textline_contour:
base_name += '_masked'
with open(base_name + '.txt', 'w') as text_file:
text_file.write(textline_text)
cv2.imwrite(base_name + '.png', img_crop)
indexer_textlines+=1
if not self.export_textline_images_and_text:
indexer_text_region = indexer_text_region +1
if not self.export_textline_images_and_text:
extracted_texts = []
extracted_conf_value = []
n_iterations = math.ceil(len(cropped_lines) / self.b_s)
for i in range(n_iterations):
if i==(n_iterations-1):
n_start = i*self.b_s
imgs = cropped_lines[n_start:]
imgs = np.array(imgs)
imgs = imgs.reshape(imgs.shape[0], image_height, image_width, 3)
ver_imgs = np.array( cropped_lines_ver_index[n_start:] )
indices_ver = np.where(ver_imgs == 1)[0]
#print(indices_ver, 'indices_ver')
if len(indices_ver)>0:
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_ver_flipped = None
if dir_in_bin is not None:
imgs_bin = cropped_lines_bin[n_start:]
imgs_bin = np.array(imgs_bin)
imgs_bin = imgs_bin.reshape(imgs_bin.shape[0], image_height, image_width, 3)
if len(indices_ver)>0:
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_bin_ver_flipped = None
else:
n_start = i*self.b_s
n_end = (i+1)*self.b_s
imgs = cropped_lines[n_start:n_end]
imgs = np.array(imgs).reshape(self.b_s, image_height, image_width, 3)
ver_imgs = np.array( cropped_lines_ver_index[n_start:n_end] )
indices_ver = np.where(ver_imgs == 1)[0]
#print(indices_ver, 'indices_ver')
if len(indices_ver)>0:
imgs_ver_flipped = imgs[indices_ver, : ,: ,:]
imgs_ver_flipped = imgs_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_ver_flipped = None
if dir_in_bin is not None:
imgs_bin = cropped_lines_bin[n_start:n_end]
imgs_bin = np.array(imgs_bin).reshape(self.b_s, image_height, image_width, 3)
if len(indices_ver)>0:
imgs_bin_ver_flipped = imgs_bin[indices_ver, : ,: ,:]
imgs_bin_ver_flipped = imgs_bin_ver_flipped[:,::-1,::-1,:]
#print(imgs_ver_flipped, 'imgs_ver_flipped')
else:
imgs_bin_ver_flipped = None
self.logger.debug("processing next %d lines", len(imgs))
preds = self.model_zoo.get('ocr').predict(imgs, verbose=0)
if len(indices_ver)>0:
preds_flipped = self.model_zoo.get('ocr').predict(imgs_ver_flipped, verbose=0)
preds_max_fliped = np.max(preds_flipped, axis=2 )
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
masked_means_flipped = \
np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
preds_max = np.max(preds, axis=2 )
preds_max_args = np.argmax(preds, axis=2 )
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
masked_means = \
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
np.sum(pred_max_not_unk_mask_bool, axis=1)
masked_means[np.isnan(masked_means)] = 0
masked_means_ver = masked_means[indices_ver]
#print(masked_means_ver, 'pred_max_not_unk')
indices_where_flipped_conf_value_is_higher = \
np.where(masked_means_flipped > masked_means_ver)[0]
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
if len(indices_where_flipped_conf_value_is_higher)>0:
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
preds[indices_to_be_replaced,:,:] = \
preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
if dir_in_bin is not None:
preds_bin = self.model_zoo.get('ocr').predict(imgs_bin, verbose=0)
if len(indices_ver)>0:
preds_flipped = self.model_zoo.get('ocr').predict(imgs_bin_ver_flipped, verbose=0)
preds_max_fliped = np.max(preds_flipped, axis=2 )
preds_max_args_flipped = np.argmax(preds_flipped, axis=2 )
pred_max_not_unk_mask_bool_flipped = preds_max_args_flipped[:,:]!=self.end_character
masked_means_flipped = \
np.sum(preds_max_fliped * pred_max_not_unk_mask_bool_flipped, axis=1) / \
np.sum(pred_max_not_unk_mask_bool_flipped, axis=1)
masked_means_flipped[np.isnan(masked_means_flipped)] = 0
preds_max = np.max(preds, axis=2 )
preds_max_args = np.argmax(preds, axis=2 )
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
masked_means = \
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
np.sum(pred_max_not_unk_mask_bool, axis=1)
masked_means[np.isnan(masked_means)] = 0
masked_means_ver = masked_means[indices_ver]
#print(masked_means_ver, 'pred_max_not_unk')
indices_where_flipped_conf_value_is_higher = \
np.where(masked_means_flipped > masked_means_ver)[0]
#print(indices_where_flipped_conf_value_is_higher, 'indices_where_flipped_conf_value_is_higher')
if len(indices_where_flipped_conf_value_is_higher)>0:
indices_to_be_replaced = indices_ver[indices_where_flipped_conf_value_is_higher]
preds_bin[indices_to_be_replaced,:,:] = \
preds_flipped[indices_where_flipped_conf_value_is_higher, :, :]
preds = (preds + preds_bin) / 2.
pred_texts = decode_batch_predictions(preds, self.model_zoo.get('num_to_char'))
preds_max = np.max(preds, axis=2 )
preds_max_args = np.argmax(preds, axis=2 )
pred_max_not_unk_mask_bool = preds_max_args[:,:]!=self.end_character
masked_means = \
np.sum(preds_max * pred_max_not_unk_mask_bool, axis=1) / \
np.sum(pred_max_not_unk_mask_bool, axis=1)
for ib in range(imgs.shape[0]):
pred_texts_ib = pred_texts[ib].replace("[UNK]", "")
if masked_means[ib] >= self.min_conf_value_of_textline_text:
extracted_texts.append(pred_texts_ib)
extracted_conf_value.append(masked_means[ib])
else:
extracted_texts.append("")
extracted_conf_value.append(0)
del cropped_lines
if dir_in_bin is not None:
del cropped_lines_bin
gc.collect()
extracted_texts_merged = [extracted_texts[ind]
if cropped_lines_meging_indexing[ind]==0
else extracted_texts[ind]+" "+extracted_texts[ind+1]
if cropped_lines_meging_indexing[ind]==1
else None
for ind in range(len(cropped_lines_meging_indexing))]
extracted_conf_value_merged = [extracted_conf_value[ind]
if cropped_lines_meging_indexing[ind]==0
else (extracted_conf_value[ind]+extracted_conf_value[ind+1])/2.
if cropped_lines_meging_indexing[ind]==1
else None
for ind in range(len(cropped_lines_meging_indexing))]
extracted_conf_value_merged = [extracted_conf_value_merged[ind_cfm]
for ind_cfm in range(len(extracted_texts_merged))
if extracted_texts_merged[ind_cfm] is not None]
extracted_texts_merged = [ind for ind in extracted_texts_merged if ind is not None]
unique_cropped_lines_region_indexer = np.unique(cropped_lines_region_indexer)
if dir_out_image_text:
#font_path = "Charis-7.000/Charis-Regular.ttf" # Make sure this file exists!
font = importlib_resources.files(__package__) / "Charis-Regular.ttf"
with importlib_resources.as_file(font) as font:
font = ImageFont.truetype(font=font, size=40)
for indexer_text, bb_ind in enumerate(total_bb_coordinates):
x_bb = bb_ind[0]
y_bb = bb_ind[1]
w_bb = bb_ind[2]
h_bb = bb_ind[3]
font = fit_text_single_line(draw, extracted_texts_merged[indexer_text],
font.path, w_bb, int(h_bb*0.4) )
##draw.rectangle([x_bb, y_bb, x_bb + w_bb, y_bb + h_bb], outline="red", width=2)
text_bbox = draw.textbbox((0, 0), extracted_texts_merged[indexer_text], font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
text_x = x_bb + (w_bb - text_width) // 2 # Center horizontally
text_y = y_bb + (h_bb - text_height) // 2 # Center vertically
# Draw the text
draw.text((text_x, text_y), extracted_texts_merged[indexer_text], fill="black", font=font)
image_text.save(out_image_with_text)
text_by_textregion = []
for ind in unique_cropped_lines_region_indexer:
ind = np.array(cropped_lines_region_indexer)==ind
extracted_texts_merged_un = np.array(extracted_texts_merged)[ind]
if len(extracted_texts_merged_un)>1:
text_by_textregion_ind = ""
next_glue = ""
for indt in range(len(extracted_texts_merged_un)):
if (extracted_texts_merged_un[indt].endswith('') or
extracted_texts_merged_un[indt].endswith('-') or
extracted_texts_merged_un[indt].endswith('¬')):
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt][:-1]
next_glue = ""
else:
text_by_textregion_ind += next_glue + extracted_texts_merged_un[indt]
next_glue = " "
text_by_textregion.append(text_by_textregion_ind)
else:
text_by_textregion.append(" ".join(extracted_texts_merged_un))
#print(text_by_textregion, 'text_by_textregiontext_by_textregiontext_by_textregiontext_by_textregiontext_by_textregion')
###index_tot_regions = []
###tot_region_ref = []
###for jj in root1.iter(link+'RegionRefIndexed'):
###index_tot_regions.append(jj.attrib['index'])
###tot_region_ref.append(jj.attrib['regionRef'])
###id_to_order = {tid: ro for tid, ro in zip(tot_region_ref, index_tot_regions)}
#id_textregions = []
#textregions_by_existing_ids = []
indexer = 0
indexer_textregion = 0
for nn in root1.iter(region_tags):
#id_textregion = nn.attrib['id']
#id_textregions.append(id_textregion)
#textregions_by_existing_ids.append(text_by_textregion[indexer_textregion])
is_textregion_text = False
for childtest in nn:
if childtest.tag.endswith("TextEquiv"):
is_textregion_text = True
if not is_textregion_text:
text_subelement_textregion = ET.SubElement(nn, 'TextEquiv')
unicode_textregion = ET.SubElement(text_subelement_textregion, 'Unicode')
has_textline = False
for child_textregion in nn:
if child_textregion.tag.endswith("TextLine"):
is_textline_text = False
for childtest2 in child_textregion:
if childtest2.tag.endswith("TextEquiv"):
is_textline_text = True
if not is_textline_text:
text_subelement = ET.SubElement(child_textregion, 'TextEquiv')
text_subelement.set('conf', f"{extracted_conf_value_merged[indexer]:.2f}")
unicode_textline = ET.SubElement(text_subelement, 'Unicode')
unicode_textline.text = extracted_texts_merged[indexer]
else:
for childtest3 in child_textregion:
if childtest3.tag.endswith("TextEquiv"):
for child_uc in childtest3:
if child_uc.tag.endswith("Unicode"):
childtest3.set('conf',
f"{extracted_conf_value_merged[indexer]:.2f}")
child_uc.text = extracted_texts_merged[indexer]
indexer = indexer + 1
has_textline = True
if has_textline:
if is_textregion_text:
for child4 in nn:
if child4.tag.endswith("TextEquiv"):
for childtr_uc in child4:
if childtr_uc.tag.endswith("Unicode"):
childtr_uc.text = text_by_textregion[indexer_textregion]
else:
unicode_textregion.text = text_by_textregion[indexer_textregion]
indexer_textregion = indexer_textregion + 1
###sample_order = [(id_to_order[tid], text)
### for tid, text in zip(id_textregions, textregions_by_existing_ids)
### if tid in id_to_order]
##ordered_texts_sample = [text for _, text in sorted(sample_order)]
##tot_page_text = ' '.join(ordered_texts_sample)
##for page_element in root1.iter(link+'Page'):
##text_page = ET.SubElement(page_element, 'TextEquiv')
##unicode_textpage = ET.SubElement(text_page, 'Unicode')
##unicode_textpage.text = tot_page_text
ET.register_namespace("",name_space)
tree1.write(out_file_ocr,xml_declaration=True,method='xml',encoding="utf-8",default_namespace=None)
#print("Job done in %.1fs", time.time() - t0)

View file

@ -5,24 +5,25 @@ Image enhancer. The output can be written as same scale of input or in new predi
from logging import Logger from logging import Logger
import os import os
import time import time
from typing import Optional from typing import Dict, Optional
from pathlib import Path from pathlib import Path
import gc import gc
import cv2 import cv2
from keras.models import Model
import numpy as np import numpy as np
from ocrd_utils import getLogger, tf_disable_interactive_logs from ocrd_utils import getLogger, tf_disable_interactive_logs
import tensorflow as tf import tensorflow as tf
from skimage.morphology import skeletonize from skimage.morphology import skeletonize
from tensorflow.keras.models import load_model
from .model_zoo import EynollahModelZoo
from .utils.resize import resize_image from .utils.resize import resize_image
from .utils.pil_cv2 import pil2cv from .utils.pil_cv2 import pil2cv
from .utils import ( from .utils import (
is_image_filename, is_image_filename,
crop_image_inside_box crop_image_inside_box
) )
from .eynollah import PatchEncoder, Patches from .patch_encoder import PatchEncoder, Patches
DPI_THRESHOLD = 298 DPI_THRESHOLD = 298
KERNEL = np.ones((5, 5), np.uint8) KERNEL = np.ones((5, 5), np.uint8)
@ -50,11 +51,9 @@ class Enhancer:
self.num_col_lower = num_col_lower self.num_col_lower = num_col_lower
self.logger = logger if logger else getLogger('enhancement') self.logger = logger if logger else getLogger('enhancement')
self.dir_models = dir_models self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_dir_of_binarization = dir_models + "/eynollah-binarization_20210425" for v in ['binarization', 'enhancement', 'col_classifier', 'page']:
self.model_dir_of_enhancement = dir_models + "/eynollah-enhancement_20210425" self.model_zoo.load_model(v)
self.model_dir_of_col_classifier = dir_models + "/eynollah-column-classifier_20210425"
self.model_page_dir = dir_models + "/model_eynollah_page_extraction_20250915"
try: try:
for device in tf.config.list_physical_devices('GPU'): for device in tf.config.list_physical_devices('GPU'):
@ -62,11 +61,6 @@ class Enhancer:
except: except:
self.logger.warning("no GPU device available") self.logger.warning("no GPU device available")
self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
self.model_enhancement = self.our_load_model(self.model_dir_of_enhancement)
self.model_bin = self.our_load_model(self.model_dir_of_binarization)
def cache_images(self, image_filename=None, image_pil=None, dpi=None): def cache_images(self, image_filename=None, image_pil=None, dpi=None):
ret = {} ret = {}
if image_filename: if image_filename:
@ -103,23 +97,11 @@ class Enhancer:
def isNaN(self, num): def isNaN(self, num):
return num != num return num != num
@staticmethod
def our_load_model(model_file):
if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
# prefer SavedModel over HDF5 format if it exists
model_file = model_file[:-3]
try:
model = load_model(model_file, compile=False)
except:
model = load_model(model_file, compile=False, custom_objects={
"PatchEncoder": PatchEncoder, "Patches": Patches})
return model
def predict_enhancement(self, img): def predict_enhancement(self, img):
self.logger.debug("enter predict_enhancement") self.logger.debug("enter predict_enhancement")
img_height_model = self.model_enhancement.layers[-1].output_shape[1] img_height_model = self.model_zoo.get('enhancement', Model).layers[-1].output_shape[1]
img_width_model = self.model_enhancement.layers[-1].output_shape[2] img_width_model = self.model_zoo.get('enhancement', Model).layers[-1].output_shape[2]
if img.shape[0] < img_height_model: if img.shape[0] < img_height_model:
img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST) img = cv2.resize(img, (img.shape[1], img_width_model), interpolation=cv2.INTER_NEAREST)
if img.shape[1] < img_width_model: if img.shape[1] < img_width_model:
@ -160,7 +142,7 @@ class Enhancer:
index_y_d = img_h - img_height_model index_y_d = img_h - img_height_model
img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :] img_patch = img[np.newaxis, index_y_d:index_y_u, index_x_d:index_x_u, :]
label_p_pred = self.model_enhancement.predict(img_patch, verbose=0) label_p_pred = self.model_zoo.get('enhancement', Model).predict(img_patch, verbose=0)
seg = label_p_pred[0, :, :, :] * 255 seg = label_p_pred[0, :, :, :] * 255
if i == 0 and j == 0: if i == 0 and j == 0:
@ -246,7 +228,7 @@ class Enhancer:
else: else:
img = self.imread() img = self.imread()
img = cv2.GaussianBlur(img, (5, 5), 0) img = cv2.GaussianBlur(img, (5, 5), 0)
img_page_prediction = self.do_prediction(False, img, self.model_page) img_page_prediction = self.do_prediction(False, img, self.model_zoo.get('page'))
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
@ -291,7 +273,7 @@ class Enhancer:
self.logger.info("Detected %s DPI", dpi) self.logger.info("Detected %s DPI", dpi)
if self.input_binary: if self.input_binary:
img = self.imread() img = self.imread()
prediction_bin = self.do_prediction(True, img, self.model_bin, n_batch_inference=5) prediction_bin = self.do_prediction(True, img, self.model_zoo.get('binarization'), n_batch_inference=5)
prediction_bin = 255 * (prediction_bin[:,:,0]==0) prediction_bin = 255 * (prediction_bin[:,:,0]==0)
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8) prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2).astype(np.uint8)
img= np.copy(prediction_bin) img= np.copy(prediction_bin)
@ -332,7 +314,7 @@ class Enhancer:
img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = self.model_classifier.predict(img_in, verbose=0) label_p_pred = self.model_zoo.get('col_classifier').predict(img_in, verbose=0)
num_col = np.argmax(label_p_pred[0]) + 1 num_col = np.argmax(label_p_pred[0]) + 1
elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower): elif (self.num_col_upper and self.num_col_lower) and (self.num_col_upper!=self.num_col_lower):
if self.input_binary: if self.input_binary:
@ -352,7 +334,7 @@ class Enhancer:
img_in[0, :, :, 1] = img_1ch[:, :] img_in[0, :, :, 1] = img_1ch[:, :]
img_in[0, :, :, 2] = img_1ch[:, :] img_in[0, :, :, 2] = img_1ch[:, :]
label_p_pred = self.model_classifier.predict(img_in, verbose=0) label_p_pred = self.model_zoo.get('col_classifier').predict(img_in, verbose=0)
num_col = np.argmax(label_p_pred[0]) + 1 num_col = np.argmax(label_p_pred[0]) + 1
if num_col > self.num_col_upper: if num_col > self.num_col_upper:

View file

@ -10,12 +10,13 @@ from pathlib import Path
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import cv2 import cv2
from keras.models import Model
import numpy as np import numpy as np
from ocrd_utils import getLogger from ocrd_utils import getLogger
import statistics import statistics
import tensorflow as tf import tensorflow as tf
from tensorflow.keras.models import load_model
from .model_zoo import EynollahModelZoo
from .utils.resize import resize_image from .utils.resize import resize_image
from .utils.contour import ( from .utils.contour import (
find_new_features_of_contours, find_new_features_of_contours,
@ -23,7 +24,6 @@ from .utils.contour import (
return_parent_contours, return_parent_contours,
) )
from .utils import is_xml_filename from .utils import is_xml_filename
from .eynollah import PatchEncoder, Patches
DPI_THRESHOLD = 298 DPI_THRESHOLD = 298
KERNEL = np.ones((5, 5), np.uint8) KERNEL = np.ones((5, 5), np.uint8)
@ -45,21 +45,11 @@ class machine_based_reading_order_on_layout:
except: except:
self.logger.warning("no GPU device available") self.logger.warning("no GPU device available")
self.model_reading_order = self.our_load_model(self.model_reading_order_dir) self.model_zoo = EynollahModelZoo(basedir=dir_models)
self.model_zoo.load_model('reading_order')
# FIXME: light_version is always true, no need for checks in the code
self.light_version = True self.light_version = True
@staticmethod
def our_load_model(model_file):
if model_file.endswith('.h5') and Path(model_file[:-3]).exists():
# prefer SavedModel over HDF5 format if it exists
model_file = model_file[:-3]
try:
model = load_model(model_file, compile=False)
except:
model = load_model(model_file, compile=False, custom_objects={
"PatchEncoder": PatchEncoder, "Patches": Patches})
return model
def read_xml(self, xml_file): def read_xml(self, xml_file):
tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8')) tree1 = ET.parse(xml_file, parser = ET.XMLParser(encoding='utf-8'))
root1=tree1.getroot() root1=tree1.getroot()
@ -69,6 +59,7 @@ class machine_based_reading_order_on_layout:
index_tot_regions = [] index_tot_regions = []
tot_region_ref = [] tot_region_ref = []
y_len, x_len = 0, 0
for jj in root1.iter(link+'Page'): for jj in root1.iter(link+'Page'):
y_len=int(jj.attrib['imageHeight']) y_len=int(jj.attrib['imageHeight'])
x_len=int(jj.attrib['imageWidth']) x_len=int(jj.attrib['imageWidth'])
@ -81,13 +72,13 @@ class machine_based_reading_order_on_layout:
co_printspace = [] co_printspace = []
if link+'PrintSpace' in alltags: if link+'PrintSpace' in alltags:
region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')]) region_tags_printspace = np.unique([x for x in alltags if x.endswith('PrintSpace')])
elif link+'Border' in alltags: else:
region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')]) region_tags_printspace = np.unique([x for x in alltags if x.endswith('Border')])
for tag in region_tags_printspace: for tag in region_tags_printspace:
if link+'PrintSpace' in alltags: if link+'PrintSpace' in alltags:
tag_endings_printspace = ['}PrintSpace','}printspace'] tag_endings_printspace = ['}PrintSpace','}printspace']
elif link+'Border' in alltags: else:
tag_endings_printspace = ['}Border','}border'] tag_endings_printspace = ['}Border','}border']
if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]): if tag.endswith(tag_endings_printspace[0]) or tag.endswith(tag_endings_printspace[1]):
@ -683,7 +674,7 @@ class machine_based_reading_order_on_layout:
tot_counter += 1 tot_counter += 1
batch.append(j) batch.append(j)
if tot_counter % inference_bs == 0 or tot_counter == len(ij_list): if tot_counter % inference_bs == 0 or tot_counter == len(ij_list):
y_pr = self.model_reading_order.predict(input_1 , verbose=0) y_pr = self.model_zoo.get('reading_order', Model).predict(input_1 , verbose='0')
for jb, j in enumerate(batch): for jb, j in enumerate(batch):
if y_pr[jb][0]>=0.5: if y_pr[jb][0]>=0.5:
post_list.append(j) post_list.append(j)
@ -802,6 +793,7 @@ class machine_based_reading_order_on_layout:
alltags=[elem.tag for elem in root_xml.iter()] alltags=[elem.tag for elem in root_xml.iter()]
ET.register_namespace("",name_space) ET.register_namespace("",name_space)
assert dir_out
tree_xml.write(os.path.join(dir_out, file_name+'.xml'), tree_xml.write(os.path.join(dir_out, file_name+'.xml'),
xml_declaration=True, xml_declaration=True,
method='xml', method='xml',

View file

@ -0,0 +1,4 @@
__all__ = [
'EynollahModelZoo',
]
from .model_zoo import EynollahModelZoo

View file

@ -0,0 +1,314 @@
from .specs import EynollahModelSpec, EynollahModelSpecSet
from .types import KerasModel, TrOCRProcessor, List
# NOTE: This needs to change whenever models/versions change
ZENODO = "https://zenodo.org/records/17295988/files"
MODELS_VERSION = "v0_7_0"
def dist_url(dist_name: str) -> str:
return f'{ZENODO}/models_{dist_name}_{MODELS_VERSION}.zip'
DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
EynollahModelSpec(
category="enhancement",
variant='',
filename="models_eynollah/eynollah-enhancement_20210425",
dists=['enhancement', 'layout'],
dist_url=dist_url("enhancement"),
type=KerasModel,
),
EynollahModelSpec(
category="binarization",
variant='',
filename="models_eynollah/eynollah-binarization-hybrid_20230504",
dists=['layout', 'binarization'],
dist_url=dist_url("binarization"),
type=KerasModel,
),
EynollahModelSpec(
category="binarization",
variant='20210309',
filename="models_eynollah/eynollah-binarization_20210309",
dists=['binarization'],
dist_url=dist_url("binarization"),
type=KerasModel,
),
EynollahModelSpec(
category="binarization",
variant='augment',
filename="models_eynollah/eynollah-binarization_20210425",
dists=['binarization'],
dist_url=dist_url("binarization"),
type=KerasModel,
),
EynollahModelSpec(
category="binarization_multi_1",
variant='',
filename="models_eynollah/eynollah-binarization-multi_2020_01_16/model_bin1",
dist_url=dist_url("binarization"),
dists=['binarization'],
type=KerasModel,
),
EynollahModelSpec(
category="binarization_multi_2",
variant='',
filename="models_eynollah/eynollah-binarization-multi_2020_01_16/model_bin2",
dist_url=dist_url("binarization"),
dists=['binarization'],
type=KerasModel,
),
EynollahModelSpec(
category="binarization_multi_3",
variant='',
filename="models_eynollah/eynollah-binarization-multi_2020_01_16/model_bin3",
dist_url=dist_url("binarization"),
dists=['binarization'],
type=KerasModel,
),
EynollahModelSpec(
category="binarization_multi_4",
variant='',
filename="models_eynollah/eynollah-binarization-multi_2020_01_16/model_bin4",
dist_url=dist_url("binarization"),
dists=['binarization'],
type=KerasModel,
),
EynollahModelSpec(
category="col_classifier",
variant='',
filename="models_eynollah/eynollah-column-classifier_20210425",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="page",
variant='',
filename="models_eynollah/model_eynollah_page_extraction_20250915",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="region",
variant='',
filename="models_eynollah/eynollah-main-regions-ensembled_20210425",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="region",
variant='extract_only_images',
filename="models_eynollah/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="region",
variant='light',
filename="models_eynollah/eynollah-main-regions_20220314",
dist_url=dist_url("layout"),
help="early layout",
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="region_p2",
variant='',
filename="models_eynollah/eynollah-main-regions-aug-rotation_20210425",
dist_url=dist_url("layout"),
help="early layout, non-light, 2nd part",
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="region_1_2",
variant='',
#filename="models_eynollah/modelens_12sp_elay_0_3_4__3_6_n",
#filename="models_eynollah/modelens_earlylayout_12spaltige_2_3_5_6_7_8",
#filename="models_eynollah/modelens_early12_sp_2_3_5_6_7_8_9_10_12_14_15_16_18",
#filename="models_eynollah/modelens_1_2_4_5_early_lay_1_2_spaltige",
#filename="models_eynollah/model_3_eraly_layout_no_patches_1_2_spaltige",
filename="models_eynollah/modelens_e_l_all_sp_0_1_2_3_4_171024",
dist_url=dist_url("layout"),
dists=['layout'],
help="early layout, light, 1-or-2-column",
type=KerasModel,
),
EynollahModelSpec(
category="region_fl_np",
variant='',
#'filename="models_eynollah/modelens_full_lay_1_3_031124",
#'filename="models_eynollah/modelens_full_lay_13__3_19_241024",
#'filename="models_eynollah/model_full_lay_13_241024",
#'filename="models_eynollah/modelens_full_lay_13_17_231024",
#'filename="models_eynollah/modelens_full_lay_1_2_221024",
#'filename="models_eynollah/eynollah-full-regions-1column_20210425",
filename="models_eynollah/modelens_full_lay_1__4_3_091124",
dist_url=dist_url("layout"),
help="full layout / no patches",
dists=['layout'],
type=KerasModel,
),
# FIXME: Why is region_fl and region_fl_np the same model?
EynollahModelSpec(
category="region_fl",
variant='',
# filename="models_eynollah/eynollah-full-regions-3+column_20210425",
# filename="models_eynollah/model_2_full_layout_new_trans",
# filename="models_eynollah/modelens_full_lay_1_3_031124",
# filename="models_eynollah/modelens_full_lay_13__3_19_241024",
# filename="models_eynollah/model_full_lay_13_241024",
# filename="models_eynollah/modelens_full_lay_13_17_231024",
# filename="models_eynollah/modelens_full_lay_1_2_221024",
# filename="models_eynollah/modelens_full_layout_24_till_28",
# filename="models_eynollah/model_2_full_layout_new_trans",
filename="models_eynollah/modelens_full_lay_1__4_3_091124",
dist_url=dist_url("layout"),
help="full layout / with patches",
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="reading_order",
variant='',
#filename="models_eynollah/model_mb_ro_aug_ens_11",
#filename="models_eynollah/model_step_3200000_mb_ro",
#filename="models_eynollah/model_ens_reading_order_machine_based",
#filename="models_eynollah/model_mb_ro_aug_ens_8",
#filename="models_eynollah/model_ens_reading_order_machine_based",
filename="models_eynollah/model_eynollah_reading_order_20250824",
dist_url=dist_url("reading_order"),
dists=['layout', 'reading_order'],
type=KerasModel,
),
EynollahModelSpec(
category="textline",
variant='',
#filename="models_eynollah/modelens_textline_1_4_16092024",
#filename="models_eynollah/model_textline_ens_3_4_5_6_artificial",
#filename="models_eynollah/modelens_textline_1_3_4_20240915",
#filename="models_eynollah/model_textline_ens_3_4_5_6_artificial",
#filename="models_eynollah/modelens_textline_9_12_13_14_15",
#filename="models_eynollah/eynollah-textline_20210425",
filename="models_eynollah/modelens_textline_0_1__2_4_16092024",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="textline",
variant='light',
#filename="models_eynollah/eynollah-textline_light_20210425",
filename="models_eynollah/modelens_textline_0_1__2_4_16092024",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="table",
variant='',
filename="models_eynollah/eynollah-tables_20210319",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="table",
variant='light',
filename="models_eynollah/modelens_table_0t4_201124",
dist_url=dist_url("layout"),
dists=['layout'],
type=KerasModel,
),
EynollahModelSpec(
category="ocr",
variant='',
filename="models_eynollah/model_eynollah_ocr_cnnrnn_20250930",
dist_url=dist_url("ocr"),
dists=['layout', 'ocr'],
type=KerasModel,
),
EynollahModelSpec(
category="ocr",
variant='degraded',
filename="models_eynollah/model_eynollah_ocr_cnnrnn__degraded_20250805/",
help="slightly better at degraded Fraktur",
dist_url=dist_url("ocr"),
dists=['ocr'],
type=KerasModel,
),
EynollahModelSpec(
category="num_to_char",
variant='',
filename="characters_org.txt",
dist_url=dist_url("ocr"),
dists=['ocr'],
type=KerasModel,
),
EynollahModelSpec(
category="characters",
variant='',
filename="characters_org.txt",
dist_url=dist_url("ocr"),
dists=['ocr'],
type=list,
),
EynollahModelSpec(
category="ocr",
variant='tr',
filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
dist_url=dist_url("trocr"),
help='much slower transformer-based',
dists=['trocr'],
type=KerasModel,
),
EynollahModelSpec(
category="trocr_processor",
variant='',
filename="models_eynollah/microsoft/trocr-base-printed",
dist_url=dist_url("trocr"),
dists=['trocr'],
type=KerasModel,
),
EynollahModelSpec(
category="trocr_processor",
variant='htr',
filename="models_eynollah/microsoft/trocr-base-handwritten",
dist_url=dist_url("trocr"),
dists=['trocr'],
type=TrOCRProcessor,
),
])

View file

@ -0,0 +1,189 @@
import json
import logging
from copy import deepcopy
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type, Union
from keras.layers import StringLookup
from keras.models import Model as KerasModel
from keras.models import load_model
from tabulate import tabulate
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from ..patch_encoder import PatchEncoder, Patches
from .specs import EynollahModelSpecSet
from .default_specs import DEFAULT_MODEL_SPECS
from .types import AnyModel, T
class EynollahModelZoo:
"""
Wrapper class that handles storage and loading of models for all eynollah runners.
"""
model_basedir: Path
specs: EynollahModelSpecSet
def __init__(
self,
basedir: str,
model_overrides: Optional[List[Tuple[str, str, str]]] = None,
) -> None:
self.model_basedir = Path(basedir)
self.logger = logging.getLogger('eynollah.model_zoo')
self.specs = deepcopy(DEFAULT_MODEL_SPECS)
if model_overrides:
self.override_models(*model_overrides)
self._loaded: Dict[str, AnyModel] = {}
def override_models(
self,
*model_overrides: Tuple[str, str, str],
):
"""
Override the default model versions
"""
for model_category, model_variant, model_filename in model_overrides:
spec = self.specs.get(model_category, model_variant)
self.logger.warning("Overriding filename for model spec %s to %s", spec, model_filename)
self.specs.get(model_category, model_variant).filename = model_filename
def model_path(
self,
model_category: str,
model_variant: str = '',
absolute: bool = True,
) -> Path:
"""
Translate model_{type,variant} tuple into an absolute (or relative) Path
"""
spec = self.specs.get(model_category, model_variant)
if spec.category in ('characters', 'num_to_char'):
return self.model_path('ocr') / spec.filename
if not Path(spec.filename).is_absolute() and absolute:
model_path = Path(self.model_basedir).joinpath(spec.filename)
else:
model_path = Path(spec.filename)
return model_path
def load_models(
self,
*all_load_args: Union[str, Tuple[str], Tuple[str, str], Tuple[str, str, str]],
) -> Dict:
"""
Load all models by calling load_model and return a dictionary mapping model_category to loaded model
"""
ret = {}
for load_args in all_load_args:
if isinstance(load_args, str):
ret[load_args] = self.load_model(load_args)
else:
ret[load_args[0]] = self.load_model(*load_args)
return ret
def load_model(
self,
model_category: str,
model_variant: str = '',
) -> AnyModel:
"""
Load any model
"""
model_path = self.model_path(model_category, model_variant)
if model_path.suffix == '.h5' and Path(model_path.stem).exists():
# prefer SavedModel over HDF5 format if it exists
model_path = Path(model_path.stem)
if model_category == 'ocr':
model = self._load_ocr_model(variant=model_variant)
elif model_category == 'num_to_char':
model = self._load_num_to_char()
elif model_category == 'characters':
model = self._load_characters()
elif model_category == 'trocr_processor':
return TrOCRProcessor.from_pretrained(self.model_path(...))
else:
try:
model = load_model(model_path, compile=False)
except Exception as e:
self.logger.exception(e)
model = load_model(
model_path, compile=False, custom_objects={"PatchEncoder": PatchEncoder, "Patches": Patches}
)
self._loaded[model_category] = model
return model # type: ignore
def get(self, model_category: str, model_type: Optional[Type[T]] = None) -> T:
if model_category not in self._loaded:
raise ValueError(f'Model "{model_category} not previously loaded with "load_model(..)"')
ret = self._loaded[model_category]
if model_type:
assert isinstance(ret, model_type)
return ret # type: ignore # FIXME: convince typing that we're returning generic type
def _load_ocr_model(self, variant: str) -> AnyModel:
"""
Load OCR model
"""
ocr_model_dir = self.model_path('ocr', variant)
if variant == 'tr':
return VisionEncoderDecoderModel.from_pretrained(ocr_model_dir)
else:
ocr_model = load_model(ocr_model_dir, compile=False)
assert isinstance(ocr_model, KerasModel)
return KerasModel(
ocr_model.get_layer(name="image").input, # type: ignore
ocr_model.get_layer(name="dense2").output, # type: ignore
)
def _load_characters(self) -> List[str]:
"""
Load encoding for OCR
"""
with open(self.model_path('num_to_char'), "r") as config_file:
return json.load(config_file)
def _load_num_to_char(self) -> StringLookup:
"""
Load decoder for OCR
"""
characters = self._load_characters()
# Mapping characters to integers.
char_to_num = StringLookup(vocabulary=characters, mask_token=None)
# Mapping integers back to original characters.
return StringLookup(vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True)
def __str__(self):
return tabulate(
[
[
spec.type.__name__,
spec.category,
spec.variant,
spec.help,
', '.join(spec.dists),
f'Yes, at {self.model_path(spec.category, spec.variant)}'
if self.model_path(spec.category, spec.variant).exists()
else f'No, download {spec.dist_url}',
# self.model_path(spec.category, spec.variant),
]
for spec in self.specs.specs
],
headers=[
'Type',
'Category',
'Variant',
'Help',
'Used in',
'Installed',
],
tablefmt='github',
)
def shutdown(self):
"""
Ensure that a loaded models is not referenced by ``self._loaded`` anymore
"""
if hasattr(self, '_loaded') and getattr(self, '_loaded'):
for needle in self._loaded:
if self._loaded[needle]:
del self._loaded[needle]

View file

@ -0,0 +1,55 @@
from dataclasses import dataclass
from typing import Dict, List, Set, Tuple, Type
from .types import AnyModel
@dataclass
class EynollahModelSpec():
"""
Describing a single model abstractly.
"""
category: str
# Relative filename to the models_eynollah directory in the dists
filename: str
# basename of the ZIP files that should contain this model
dists: List[str]
# URL to the smallest model distribution containing this model (link to Zenodo)
dist_url: str
type: Type[AnyModel]
variant: str = ''
help: str = ''
class EynollahModelSpecSet():
"""
List of all used models for eynollah.
"""
specs: List[EynollahModelSpec]
def __init__(self, specs: List[EynollahModelSpec]) -> None:
self.specs = sorted(specs, key=lambda x: x.category + '0' + x.variant)
self.categories: Set[str] = set([spec.category for spec in self.specs])
self.variants: Dict[str, Set[str]] = {
spec.category: set([x.variant for x in self.specs if x.category == spec.category])
for spec in self.specs
}
self._index_category_variant: Dict[Tuple[str, str], EynollahModelSpec] = {
(spec.category, spec.variant): spec
for spec in self.specs
}
def asdict(self) -> Dict[str, Dict[str, str]]:
return {
spec.category: {
spec.variant: spec.filename
}
for spec in self.specs
}
def get(self, category: str, variant: str) -> EynollahModelSpec:
if category not in self.categories:
raise ValueError(f"Unknown category '{category}', must be one of {self.categories}")
if variant not in self.variants[category]:
raise ValueError(f"Unknown variant {variant} for {category}. Known variants: {self.variants[category]}")
return self._index_category_variant[(category, variant)]

View file

@ -0,0 +1,6 @@
from typing import List, TypeVar, Union
from keras.models import Model as KerasModel
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
AnyModel = Union[VisionEncoderDecoderModel, TrOCRProcessor, KerasModel, List]
T = TypeVar('T')

View file

@ -83,10 +83,10 @@
}, },
"resources": [ "resources": [
{ {
"url": "https://zenodo.org/records/17194824/files/models_layout_v0_5_0.tar.gz?download=1", "url": "https://zenodo.org/records/17295988/files/models_layout_v0_6_0.tar.gz?download=1",
"name": "models_layout_v0_5_0", "name": "models_layout_v0_6_0",
"type": "archive", "type": "archive",
"path_in_archive": "models_layout_v0_5_0", "path_in_archive": "models_layout_v0_6_0",
"size": 3525684179, "size": 3525684179,
"description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement", "description": "Models for layout detection, reading order detection, textline detection, page extraction, column classification, table detection, binarization, image enhancement",
"version_range": ">= v0.5.0" "version_range": ">= v0.5.0"

View file

@ -0,0 +1,52 @@
from keras import layers
import tensorflow as tf
projection_dim = 64
patch_size = 1
num_patches =21*21#14*14#28*28#14*14#28*28
class PatchEncoder(layers.Layer):
def __init__(self):
super().__init__()
self.projection = layers.Dense(units=projection_dim)
self.position_embedding = layers.Embedding(input_dim=num_patches, output_dim=projection_dim)
def call(self, patch):
positions = tf.range(start=0, limit=num_patches, delta=1)
encoded = self.projection(patch) + self.position_embedding(positions)
return encoded
def get_config(self):
config = super().get_config().copy()
config.update({
'num_patches': num_patches,
'projection': self.projection,
'position_embedding': self.position_embedding,
})
return config
class Patches(layers.Layer):
def __init__(self, **kwargs):
super(Patches, self).__init__()
self.patch_size = patch_size
def call(self, images):
batch_size = tf.shape(images)[0]
patches = tf.image.extract_patches(
images=images,
sizes=[1, self.patch_size, self.patch_size, 1],
strides=[1, self.patch_size, self.patch_size, 1],
rates=[1, 1, 1, 1],
padding="VALID",
)
patch_dims = patches.shape[-1]
patches = tf.reshape(patches, [batch_size, -1, patch_dims])
return patches
def get_config(self):
config = super().get_config().copy()
config.update({
'patch_size': self.patch_size,
})
return config

View file

@ -40,8 +40,8 @@ class EynollahPlotter:
self.image_filename_stem = image_filename_stem self.image_filename_stem = image_filename_stem
# XXX TODO hacky these cannot be set at init time # XXX TODO hacky these cannot be set at init time
self.image_org = image_org self.image_org = image_org
self.scale_x = scale_x self.scale_x : float = scale_x
self.scale_y = scale_y self.scale_y : float = scale_y
def save_plot_of_layout_main(self, text_regions_p, image_page): def save_plot_of_layout_main(self, text_regions_p, image_page):
if self.dir_of_layout is not None: if self.dir_of_layout is not None:

View file

@ -2,18 +2,19 @@
Tool to load model and binarize a given image. Tool to load model and binarize a given image.
""" """
import sys
from glob import glob
import os import os
import logging import logging
from pathlib import Path
from typing import Dict, List
from keras.models import Model
import numpy as np import numpy as np
from PIL import Image
import cv2 import cv2
from ocrd_utils import tf_disable_interactive_logs from ocrd_utils import tf_disable_interactive_logs
from eynollah.model_zoo import EynollahModelZoo
tf_disable_interactive_logs() tf_disable_interactive_logs()
import tensorflow as tf import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.python.keras import backend as tensorflow_backend from tensorflow.python.keras import backend as tensorflow_backend
from .utils import is_image_filename from .utils import is_image_filename
@ -23,40 +24,37 @@ def resize_image(img_in, input_height, input_width):
class SbbBinarizer: class SbbBinarizer:
def __init__(self, model_dir, logger=None): def __init__(self, model_dir: str, mode: str, logger=None):
self.model_dir = model_dir if mode not in ('single', 'multi'):
raise ValueError(f"'mode' must be either 'multi' or 'single', not {mode}")
self.log = logger if logger else logging.getLogger('SbbBinarizer') self.log = logger if logger else logging.getLogger('SbbBinarizer')
self.model_zoo = EynollahModelZoo(basedir=model_dir)
self.start_new_session() self.models = self.setup_models(mode)
self.session = self.start_new_session()
self.model_files = glob(self.model_dir+"/*/", recursive = True)
self.models = []
for model_file in self.model_files:
self.models.append(self.load_model(model_file))
def start_new_session(self): def start_new_session(self):
config = tf.compat.v1.ConfigProto() config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True config.gpu_options.allow_growth = True
self.session = tf.compat.v1.Session(config=config) # tf.InteractiveSession() session = tf.compat.v1.Session(config=config) # tf.InteractiveSession()
tensorflow_backend.set_session(self.session) tensorflow_backend.set_session(session)
return session
def setup_models(self, mode: str) -> Dict[Path, Model]:
return {
self.model_zoo.model_path(v): self.model_zoo.load_model(v)
for v in (['binarization'] if mode == 'single' else [f'binarization_multi_{i}' for i in range(1, 5)])
}
def end_session(self): def end_session(self):
tensorflow_backend.clear_session() tensorflow_backend.clear_session()
self.session.close() self.session.close()
del self.session del self.session
def load_model(self, model_name): def predict(self, img, use_patches, n_batch_inference=5):
model = load_model(os.path.join(self.model_dir, model_name), compile=False) model = self.model_zoo.get('binarization', Model)
model_height = model.layers[len(model.layers)-1].output_shape[1] model_height = model.layers[len(model.layers)-1].output_shape[1]
model_width = model.layers[len(model.layers)-1].output_shape[2] model_width = model.layers[len(model.layers)-1].output_shape[2]
n_classes = model.layers[len(model.layers)-1].output_shape[3]
return model, model_height, model_width, n_classes
def predict(self, model_in, img, use_patches, n_batch_inference=5):
tensorflow_backend.set_session(self.session)
model, model_height, model_width, n_classes = model_in
img_org_h = img.shape[0] img_org_h = img.shape[0]
img_org_w = img.shape[1] img_org_w = img.shape[1]
@ -324,8 +322,8 @@ class SbbBinarizer:
if image_path is not None: if image_path is not None:
image = cv2.imread(image_path) image = cv2.imread(image_path)
img_last = 0 img_last = 0
for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): for n, (model_file, model) in enumerate(self.models.items()):
self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.models.keys())))
res = self.predict(model, image, use_patches) res = self.predict(model, image, use_patches)
@ -354,8 +352,8 @@ class SbbBinarizer:
print(image_name,'image_name') print(image_name,'image_name')
image = cv2.imread(os.path.join(dir_in,image_name) ) image = cv2.imread(os.path.join(dir_in,image_name) )
img_last = 0 img_last = 0
for n, (model, model_file) in enumerate(zip(self.models, self.model_files)): for n, (model_file, model) in enumerate(self.models.items()):
self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.model_files))) self.log.info('Predicting with model %s [%s/%s]' % (model_file, n + 1, len(self.models.keys())))
res = self.predict(model, image, use_patches) res = self.predict(model, image, use_patches)

View file

@ -393,7 +393,12 @@ def find_num_col_deskew(regions_without_separators, sigma_, multiplier=3.8):
z = gaussian_filter1d(regions_without_separators_0, sigma_) z = gaussian_filter1d(regions_without_separators_0, sigma_)
return np.std(z) return np.std(z)
def find_num_col(regions_without_separators, num_col_classifier, tables, multiplier=3.8): def find_num_col(
regions_without_separators,
num_col_classifier,
tables,
multiplier=3.8,
):
if not regions_without_separators.any(): if not regions_without_separators.any():
return 0, [] return 0, []
#plt.imshow(regions_without_separators) #plt.imshow(regions_without_separators)

View file

@ -2,7 +2,7 @@
# pylint: disable=import-error # pylint: disable=import-error
from pathlib import Path from pathlib import Path
import os.path import os.path
import xml.etree.ElementTree as ET from typing import Optional
from .utils.xml import create_page_xml, xml_reading_order from .utils.xml import create_page_xml, xml_reading_order
from .utils.counter import EynollahIdCounter from .utils.counter import EynollahIdCounter
@ -10,7 +10,6 @@ from ocrd_utils import getLogger
from ocrd_models.ocrd_page import ( from ocrd_models.ocrd_page import (
BorderType, BorderType,
CoordsType, CoordsType,
PcGtsType,
TextLineType, TextLineType,
TextEquivType, TextEquivType,
TextRegionType, TextRegionType,
@ -32,10 +31,10 @@ class EynollahXmlWriter:
self.curved_line = curved_line self.curved_line = curved_line
self.textline_light = textline_light self.textline_light = textline_light
self.pcgts = pcgts self.pcgts = pcgts
self.scale_x = None # XXX set outside __init__ self.scale_x: Optional[float] = None # XXX set outside __init__
self.scale_y = None # XXX set outside __init__ self.scale_y: Optional[float] = None # XXX set outside __init__
self.height_org = None # XXX set outside __init__ self.height_org: Optional[int] = None # XXX set outside __init__
self.width_org = None # XXX set outside __init__ self.width_org: Optional[int] = None # XXX set outside __init__
@property @property
def image_filename_stem(self): def image_filename_stem(self):
@ -135,6 +134,7 @@ class EynollahXmlWriter:
# create the file structure # create the file structure
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org)
page = pcgts.get_Page() page = pcgts.get_Page()
assert page
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page)))) page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_page_coords(cont_page))))
counter = EynollahIdCounter() counter = EynollahIdCounter()
@ -152,6 +152,7 @@ class EynollahXmlWriter:
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord, Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord,
skip_layout_reading_order)) skip_layout_reading_order))
) )
assert textregion.Coords
if conf_contours_textregions: if conf_contours_textregions:
textregion.Coords.set_conf(conf_contours_textregions[mm]) textregion.Coords.set_conf(conf_contours_textregions[mm])
page.add_TextRegion(textregion) page.add_TextRegion(textregion)
@ -168,6 +169,7 @@ class EynollahXmlWriter:
id=counter.next_region_id, type_='heading', id=counter.next_region_id, type_='heading',
Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord)) Coords=CoordsType(points=self.calculate_polygon_coords(region_contour, page_coord))
) )
assert textregion.Coords
if conf_contours_textregions_h: if conf_contours_textregions_h:
textregion.Coords.set_conf(conf_contours_textregions_h[mm]) textregion.Coords.set_conf(conf_contours_textregions_h[mm])
page.add_TextRegion(textregion) page.add_TextRegion(textregion)

View file

@ -16,10 +16,13 @@ from ocrd_models.constants import NAMESPACES as NS
testdir = Path(__file__).parent.resolve() testdir = Path(__file__).parent.resolve()
MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_5_0').resolve())) MODELS_LAYOUT = environ.get('MODELS_LAYOUT', str(testdir.joinpath('..', 'models_layout_v0_6_0').resolve()))
MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_5_1').resolve())) MODELS_OCR = environ.get('MODELS_OCR', str(testdir.joinpath('..', 'models_ocr_v0_6_0').resolve()))
MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve())) MODELS_BIN = environ.get('MODELS_BIN', str(testdir.joinpath('..', 'default-2021-03-09').resolve()))
def only_eynollah(logrec):
return logrec.name.startswith('eynollah')
@pytest.mark.parametrize( @pytest.mark.parametrize(
"options", "options",
[ [
@ -50,8 +53,6 @@ def test_run_eynollah_layout_filename(tmp_path, pytestconfig, caplog, options):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False) result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
@ -85,8 +86,6 @@ def test_run_eynollah_layout_filename2(tmp_path, pytestconfig, caplog, options):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args + options, catch_exceptions=False) result = runner.invoke(layout_cli, args + options, catch_exceptions=False)
@ -116,8 +115,6 @@ def test_run_eynollah_layout_directory(tmp_path, pytestconfig, caplog):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(layout_cli, args, catch_exceptions=False) result = runner.invoke(layout_cli, args, catch_exceptions=False)
@ -144,8 +141,6 @@ def test_run_eynollah_binarization_filename(tmp_path, pytestconfig, caplog, opti
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args + options, catch_exceptions=False) result = runner.invoke(binarization_cli, args + options, catch_exceptions=False)
@ -170,8 +165,6 @@ def test_run_eynollah_binarization_directory(tmp_path, pytestconfig, caplog):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'SbbBinarizer'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(binarization_cli, args, catch_exceptions=False) result = runner.invoke(binarization_cli, args, catch_exceptions=False)
@ -197,8 +190,6 @@ def test_run_eynollah_enhancement_filename(tmp_path, pytestconfig, caplog, optio
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'enhancement'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False) result = runner.invoke(enhancement_cli, args + options, catch_exceptions=False)
@ -223,8 +214,6 @@ def test_run_eynollah_enhancement_directory(tmp_path, pytestconfig, caplog):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'enhancement'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(enhancement_cli, args, catch_exceptions=False) result = runner.invoke(enhancement_cli, args, catch_exceptions=False)
@ -244,8 +233,6 @@ def test_run_eynollah_mbreorder_filename(tmp_path, pytestconfig, caplog):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'mbreorder'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(mbreorder_cli, args, catch_exceptions=False) result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)
@ -273,8 +260,6 @@ def test_run_eynollah_mbreorder_directory(tmp_path, pytestconfig, caplog):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'mbreorder'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(mbreorder_cli, args, catch_exceptions=False) result = runner.invoke(mbreorder_cli, args, catch_exceptions=False)
@ -306,8 +291,6 @@ def test_run_eynollah_ocr_filename(tmp_path, pytestconfig, caplog, options):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.DEBUG) caplog.set_level(logging.DEBUG)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
if "-doit" in options: if "-doit" in options:
options.insert(options.index("-doit") + 1, str(outrenderfile.parent)) options.insert(options.index("-doit") + 1, str(outrenderfile.parent))
@ -339,8 +322,6 @@ def test_run_eynollah_ocr_directory(tmp_path, pytestconfig, caplog):
if pytestconfig.getoption('verbose') > 0: if pytestconfig.getoption('verbose') > 0:
args.extend(['-l', 'DEBUG']) args.extend(['-l', 'DEBUG'])
caplog.set_level(logging.INFO) caplog.set_level(logging.INFO)
def only_eynollah(logrec):
return logrec.name == 'eynollah'
runner = CliRunner() runner = CliRunner()
with caplog.filtering(only_eynollah): with caplog.filtering(only_eynollah):
result = runner.invoke(ocr_cli, args, catch_exceptions=False) result = runner.invoke(ocr_cli, args, catch_exceptions=False)

View file

@ -22,14 +22,14 @@ Download our pretrained weights and add them to a `train/pretrained_model` folde
```sh ```sh
cd train cd train
wget -O pretrained_model.tar.gz https://zenodo.org/records/17243320/files/pretrained_model_v0_5_1.tar.gz?download=1 wget -O pretrained_model.tar.gz "https://zenodo.org/records/17295988/files/pretrained_model_v0_6_0.tar.gz?download=1"
tar xf pretrained_model.tar.gz tar xf pretrained_model.tar.gz
``` ```
### Binarization training data ### Binarization training data
A small sample of training data for binarization experiment can be found [on A small sample of training data for binarization experiment can be found [on
zenodo](https://zenodo.org/records/17243320/files/training_data_sample_binarization_v0_5_1.tar.gz?download=1), zenodo](https://zenodo.org/records/17295988/files/training_data_sample_binarization_v0_6_0.tar.gz?download=1),
which contains `images` and `labels` folders. which contains `images` and `labels` folders.
### Helpful tools ### Helpful tools