fix merge issues

This commit is contained in:
kba 2025-10-29 19:52:28 +01:00
parent de76eabc1d
commit 29c273685f
9 changed files with 46 additions and 30 deletions

View file

@ -426,13 +426,11 @@ def layout(
threshold_art_class_layout,
skip_layout_and_reading_order,
ignore_page_extraction,
log_level,
setup_logging,
):
"""
Detect Layout (with optional image enhancement and reading order detection)
"""
from eynollah.eynollah import Eynollah
from .eynollah import Eynollah
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
@ -452,7 +450,6 @@ def layout(
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
from .eynollah import Eynollah
eynollah = Eynollah(
model_zoo=ctx.obj.model_zoo,
extract_only_images=extract_only_images,

View file

@ -1,12 +1,22 @@
"""
document layout analysis (segmentation) with output in PAGE-XML
"""
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
# pylint: disable=consider-using-enumerate
# FIXME: fix all of those...
# pyright: reportUnnecessaryTypeIgnoreComment=true
# pyright: reportPossiblyUnboundVariable=false
"""
document layout analysis (segmentation) with output in PAGE-XML
"""
# pyright: reportMissingImports=false
# pyright: reportCallIssue=false
# pyright: reportOperatorIssue=false
# pyright: reportUnboundVariable=false
# pyright: reportArgumentType=false
# pyright: reportAttributeAccessIssue=false
# pyright: reportOptionalMemberAccess=false
# pyright: reportGeneralTypeIssues=false
# pyright: reportOptionalSubscript=false
import logging
import sys
@ -21,8 +31,7 @@ from difflib import SequenceMatcher as sq
import math
import os
import time
from typing import List, Optional, Tuple
import warnings
from typing import Optional
from functools import partial
from pathlib import Path
from multiprocessing import cpu_count
@ -39,17 +48,8 @@ from skimage.morphology import skeletonize
from ocrd_utils import tf_disable_interactive_logs
import statistics
try:
import torch # type: ignore
except ImportError:
torch = None
try:
import matplotlib.pyplot as plt
except ImportError:
plt = None
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
tf_disable_interactive_logs()
import tensorflow as tf
# warnings.filterwarnings("ignore")
from tensorflow.python.keras import backend as K
@ -58,6 +58,14 @@ from tensorflow.keras.models import load_model
from tensorflow.compat.v1.keras.backend import set_session
from tensorflow.keras import layers
from tensorflow.keras.layers import StringLookup
try:
import torch
except ImportError:
torch = None
try:
import matplotlib.pyplot as plt
except ImportError:
plt = None
from .model_zoo import EynollahModelZoo
from .utils.contour import (
@ -3667,8 +3675,8 @@ class Eynollah:
def return_ocr_of_textline_without_common_section(
self,
textline_image,
model_ocr: KerasModel,
processor: TrOCRProcessor,
model_ocr,
processor,
device,
width_textline,
h2w_ratio,

View file

@ -2,6 +2,11 @@
Image enhancer. The output can be written as same scale of input or in new predicted scale.
"""
# FIXME: fix all of those...
# pyright: reportUnboundVariable=false
# pyright: reportCallIssue=false
# pyright: reportArgumentType=false
import logging
import os
import time

View file

@ -2,6 +2,10 @@
Machine learning based reading order detection
"""
# pyright: reportCallIssue=false
# pyright: reportUnboundVariable=false
# pyright: reportArgumentType=false
import logging
import os
import time

View file

@ -1,5 +1,4 @@
from .specs import EynollahModelSpec, EynollahModelSpecSet
from .types import KerasModel
# NOTE: This needs to change whenever models/versions change
ZENODO = "https://zenodo.org/records/17295988/files"

View file

@ -1,6 +1,5 @@
from dataclasses import dataclass
from typing import Dict, List, Set, Tuple, Type
from .types import AnyModel
from typing import Dict, List, Set, Tuple
@dataclass

View file

@ -1,5 +1,4 @@
from typing import List, TypeVar, Union
from keras.models import Model as KerasModel
from typing import TypeVar
# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel

View file

@ -2,10 +2,15 @@
Tool to load model and binarize a given image.
"""
# pyright: reportIndexIssue=false
# pyright: reportCallIssue=false
# pyright: reportArgumentType=false
# pyright: reportPossiblyUnboundVariable=false
import os
import logging
from pathlib import Path
from typing import Dict, List, Optional
from typing import Dict, Optional
import numpy as np
import cv2
@ -326,7 +331,7 @@ class SbbBinarizer:
image = cv2.imread(image_path)
img_last = 0
for n, (model_file, model) in enumerate(self.models.items()):
self.log.info('Predicting %s with model %s [%s/%s]', image_path if image_path else '[image]', model_file, n + 1, len(self.models.keys()))
self.logger.info('Predicting %s with model %s [%s/%s]', image_path if image_path else '[image]', model_file, n + 1, len(self.models.keys()))
res = self.predict(model, image, use_patches)
img_fin = np.zeros((res.shape[0], res.shape[1], 3))

View file

@ -31,7 +31,7 @@ def eynollah_subcommands():
'layout',
'ocr',
'enhancement',
'machine-based-reading-order'
'models'
'machine-based-reading-order',
'models',
]