mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-11-09 22:24:13 +01:00
fix merge issues
This commit is contained in:
parent
de76eabc1d
commit
29c273685f
9 changed files with 46 additions and 30 deletions
|
|
@ -426,13 +426,11 @@ def layout(
|
|||
threshold_art_class_layout,
|
||||
skip_layout_and_reading_order,
|
||||
ignore_page_extraction,
|
||||
log_level,
|
||||
setup_logging,
|
||||
):
|
||||
"""
|
||||
Detect Layout (with optional image enhancement and reading order detection)
|
||||
"""
|
||||
from eynollah.eynollah import Eynollah
|
||||
from .eynollah import Eynollah
|
||||
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
|
||||
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
|
||||
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
|
||||
|
|
@ -452,7 +450,6 @@ def layout(
|
|||
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
|
||||
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
|
||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||
from .eynollah import Eynollah
|
||||
eynollah = Eynollah(
|
||||
model_zoo=ctx.obj.model_zoo,
|
||||
extract_only_images=extract_only_images,
|
||||
|
|
|
|||
|
|
@ -1,12 +1,22 @@
|
|||
"""
|
||||
document layout analysis (segmentation) with output in PAGE-XML
|
||||
"""
|
||||
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
|
||||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
||||
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
|
||||
# pylint: disable=consider-using-enumerate
|
||||
# FIXME: fix all of those...
|
||||
# pyright: reportUnnecessaryTypeIgnoreComment=true
|
||||
# pyright: reportPossiblyUnboundVariable=false
|
||||
"""
|
||||
document layout analysis (segmentation) with output in PAGE-XML
|
||||
"""
|
||||
# pyright: reportMissingImports=false
|
||||
# pyright: reportCallIssue=false
|
||||
# pyright: reportOperatorIssue=false
|
||||
# pyright: reportUnboundVariable=false
|
||||
# pyright: reportArgumentType=false
|
||||
# pyright: reportAttributeAccessIssue=false
|
||||
# pyright: reportOptionalMemberAccess=false
|
||||
# pyright: reportGeneralTypeIssues=false
|
||||
# pyright: reportOptionalSubscript=false
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
|
@ -21,8 +31,7 @@ from difflib import SequenceMatcher as sq
|
|||
import math
|
||||
import os
|
||||
import time
|
||||
from typing import List, Optional, Tuple
|
||||
import warnings
|
||||
from typing import Optional
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
from multiprocessing import cpu_count
|
||||
|
|
@ -39,17 +48,8 @@ from skimage.morphology import skeletonize
|
|||
from ocrd_utils import tf_disable_interactive_logs
|
||||
import statistics
|
||||
|
||||
try:
|
||||
import torch # type: ignore
|
||||
except ImportError:
|
||||
torch = None
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
plt = None
|
||||
|
||||
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
||||
tf_disable_interactive_logs()
|
||||
|
||||
import tensorflow as tf
|
||||
# warnings.filterwarnings("ignore")
|
||||
from tensorflow.python.keras import backend as K
|
||||
|
|
@ -58,6 +58,14 @@ from tensorflow.keras.models import load_model
|
|||
from tensorflow.compat.v1.keras.backend import set_session
|
||||
from tensorflow.keras import layers
|
||||
from tensorflow.keras.layers import StringLookup
|
||||
try:
|
||||
import torch
|
||||
except ImportError:
|
||||
torch = None
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError:
|
||||
plt = None
|
||||
|
||||
from .model_zoo import EynollahModelZoo
|
||||
from .utils.contour import (
|
||||
|
|
@ -3667,8 +3675,8 @@ class Eynollah:
|
|||
def return_ocr_of_textline_without_common_section(
|
||||
self,
|
||||
textline_image,
|
||||
model_ocr: KerasModel,
|
||||
processor: TrOCRProcessor,
|
||||
model_ocr,
|
||||
processor,
|
||||
device,
|
||||
width_textline,
|
||||
h2w_ratio,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@
|
|||
Image enhancer. The output can be written as same scale of input or in new predicted scale.
|
||||
"""
|
||||
|
||||
# FIXME: fix all of those...
|
||||
# pyright: reportUnboundVariable=false
|
||||
# pyright: reportCallIssue=false
|
||||
# pyright: reportArgumentType=false
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
Machine learning based reading order detection
|
||||
"""
|
||||
|
||||
# pyright: reportCallIssue=false
|
||||
# pyright: reportUnboundVariable=false
|
||||
# pyright: reportArgumentType=false
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from .specs import EynollahModelSpec, EynollahModelSpecSet
|
||||
from .types import KerasModel
|
||||
|
||||
# NOTE: This needs to change whenever models/versions change
|
||||
ZENODO = "https://zenodo.org/records/17295988/files"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Set, Tuple, Type
|
||||
from .types import AnyModel
|
||||
from typing import Dict, List, Set, Tuple
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from typing import List, TypeVar, Union
|
||||
from keras.models import Model as KerasModel
|
||||
from typing import TypeVar
|
||||
|
||||
# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
|
||||
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||
|
|
|
|||
|
|
@ -2,10 +2,15 @@
|
|||
Tool to load model and binarize a given image.
|
||||
"""
|
||||
|
||||
# pyright: reportIndexIssue=false
|
||||
# pyright: reportCallIssue=false
|
||||
# pyright: reportArgumentType=false
|
||||
# pyright: reportPossiblyUnboundVariable=false
|
||||
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Dict, Optional
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
|
@ -326,7 +331,7 @@ class SbbBinarizer:
|
|||
image = cv2.imread(image_path)
|
||||
img_last = 0
|
||||
for n, (model_file, model) in enumerate(self.models.items()):
|
||||
self.log.info('Predicting %s with model %s [%s/%s]', image_path if image_path else '[image]', model_file, n + 1, len(self.models.keys()))
|
||||
self.logger.info('Predicting %s with model %s [%s/%s]', image_path if image_path else '[image]', model_file, n + 1, len(self.models.keys()))
|
||||
res = self.predict(model, image, use_patches)
|
||||
|
||||
img_fin = np.zeros((res.shape[0], res.shape[1], 3))
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ def eynollah_subcommands():
|
|||
'layout',
|
||||
'ocr',
|
||||
'enhancement',
|
||||
'machine-based-reading-order'
|
||||
'models'
|
||||
'machine-based-reading-order',
|
||||
'models',
|
||||
]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue