mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-11-10 06:34:11 +01:00
fix merge issues
This commit is contained in:
parent
de76eabc1d
commit
29c273685f
9 changed files with 46 additions and 30 deletions
|
|
@ -426,13 +426,11 @@ def layout(
|
||||||
threshold_art_class_layout,
|
threshold_art_class_layout,
|
||||||
skip_layout_and_reading_order,
|
skip_layout_and_reading_order,
|
||||||
ignore_page_extraction,
|
ignore_page_extraction,
|
||||||
log_level,
|
|
||||||
setup_logging,
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Detect Layout (with optional image enhancement and reading order detection)
|
Detect Layout (with optional image enhancement and reading order detection)
|
||||||
"""
|
"""
|
||||||
from eynollah.eynollah import Eynollah
|
from .eynollah import Eynollah
|
||||||
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
|
assert enable_plotting or not save_layout, "Plotting with -sl also requires -ep"
|
||||||
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
|
assert enable_plotting or not save_deskewed, "Plotting with -sd also requires -ep"
|
||||||
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
|
assert enable_plotting or not save_all, "Plotting with -sa also requires -ep"
|
||||||
|
|
@ -452,7 +450,6 @@ def layout(
|
||||||
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
|
assert not extract_only_images or not right2left, "Image extraction -eoi can not be set alongside right2left -r2l"
|
||||||
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
|
assert not extract_only_images or not headers_off, "Image extraction -eoi can not be set alongside headers_off -ho"
|
||||||
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
assert bool(image) != bool(dir_in), "Either -i (single input) or -di (directory) must be provided, but not both."
|
||||||
from .eynollah import Eynollah
|
|
||||||
eynollah = Eynollah(
|
eynollah = Eynollah(
|
||||||
model_zoo=ctx.obj.model_zoo,
|
model_zoo=ctx.obj.model_zoo,
|
||||||
extract_only_images=extract_only_images,
|
extract_only_images=extract_only_images,
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,22 @@
|
||||||
|
"""
|
||||||
|
document layout analysis (segmentation) with output in PAGE-XML
|
||||||
|
"""
|
||||||
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
|
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
|
||||||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
||||||
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
|
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
|
||||||
# pylint: disable=consider-using-enumerate
|
# pylint: disable=consider-using-enumerate
|
||||||
|
# FIXME: fix all of those...
|
||||||
# pyright: reportUnnecessaryTypeIgnoreComment=true
|
# pyright: reportUnnecessaryTypeIgnoreComment=true
|
||||||
# pyright: reportPossiblyUnboundVariable=false
|
# pyright: reportPossiblyUnboundVariable=false
|
||||||
"""
|
# pyright: reportMissingImports=false
|
||||||
document layout analysis (segmentation) with output in PAGE-XML
|
# pyright: reportCallIssue=false
|
||||||
"""
|
# pyright: reportOperatorIssue=false
|
||||||
|
# pyright: reportUnboundVariable=false
|
||||||
|
# pyright: reportArgumentType=false
|
||||||
|
# pyright: reportAttributeAccessIssue=false
|
||||||
|
# pyright: reportOptionalMemberAccess=false
|
||||||
|
# pyright: reportGeneralTypeIssues=false
|
||||||
|
# pyright: reportOptionalSubscript=false
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -21,8 +31,7 @@ from difflib import SequenceMatcher as sq
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from typing import List, Optional, Tuple
|
from typing import Optional
|
||||||
import warnings
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from multiprocessing import cpu_count
|
from multiprocessing import cpu_count
|
||||||
|
|
@ -39,17 +48,8 @@ from skimage.morphology import skeletonize
|
||||||
from ocrd_utils import tf_disable_interactive_logs
|
from ocrd_utils import tf_disable_interactive_logs
|
||||||
import statistics
|
import statistics
|
||||||
|
|
||||||
try:
|
|
||||||
import torch # type: ignore
|
|
||||||
except ImportError:
|
|
||||||
torch = None
|
|
||||||
try:
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
except ImportError:
|
|
||||||
plt = None
|
|
||||||
|
|
||||||
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
|
||||||
tf_disable_interactive_logs()
|
tf_disable_interactive_logs()
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
# warnings.filterwarnings("ignore")
|
# warnings.filterwarnings("ignore")
|
||||||
from tensorflow.python.keras import backend as K
|
from tensorflow.python.keras import backend as K
|
||||||
|
|
@ -58,6 +58,14 @@ from tensorflow.keras.models import load_model
|
||||||
from tensorflow.compat.v1.keras.backend import set_session
|
from tensorflow.compat.v1.keras.backend import set_session
|
||||||
from tensorflow.keras import layers
|
from tensorflow.keras import layers
|
||||||
from tensorflow.keras.layers import StringLookup
|
from tensorflow.keras.layers import StringLookup
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
except ImportError:
|
||||||
|
torch = None
|
||||||
|
try:
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
except ImportError:
|
||||||
|
plt = None
|
||||||
|
|
||||||
from .model_zoo import EynollahModelZoo
|
from .model_zoo import EynollahModelZoo
|
||||||
from .utils.contour import (
|
from .utils.contour import (
|
||||||
|
|
@ -3667,8 +3675,8 @@ class Eynollah:
|
||||||
def return_ocr_of_textline_without_common_section(
|
def return_ocr_of_textline_without_common_section(
|
||||||
self,
|
self,
|
||||||
textline_image,
|
textline_image,
|
||||||
model_ocr: KerasModel,
|
model_ocr,
|
||||||
processor: TrOCRProcessor,
|
processor,
|
||||||
device,
|
device,
|
||||||
width_textline,
|
width_textline,
|
||||||
h2w_ratio,
|
h2w_ratio,
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,11 @@
|
||||||
Image enhancer. The output can be written as same scale of input or in new predicted scale.
|
Image enhancer. The output can be written as same scale of input or in new predicted scale.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# FIXME: fix all of those...
|
||||||
|
# pyright: reportUnboundVariable=false
|
||||||
|
# pyright: reportCallIssue=false
|
||||||
|
# pyright: reportArgumentType=false
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@
|
||||||
Machine learning based reading order detection
|
Machine learning based reading order detection
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# pyright: reportCallIssue=false
|
||||||
|
# pyright: reportUnboundVariable=false
|
||||||
|
# pyright: reportArgumentType=false
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
from .specs import EynollahModelSpec, EynollahModelSpecSet
|
from .specs import EynollahModelSpec, EynollahModelSpecSet
|
||||||
from .types import KerasModel
|
|
||||||
|
|
||||||
# NOTE: This needs to change whenever models/versions change
|
# NOTE: This needs to change whenever models/versions change
|
||||||
ZENODO = "https://zenodo.org/records/17295988/files"
|
ZENODO = "https://zenodo.org/records/17295988/files"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict, List, Set, Tuple, Type
|
from typing import Dict, List, Set, Tuple
|
||||||
from .types import AnyModel
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
from typing import List, TypeVar, Union
|
from typing import TypeVar
|
||||||
from keras.models import Model as KerasModel
|
|
||||||
|
|
||||||
# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
|
# NOTE: Creating an actual union type requires loading transformers which is expensive and error-prone
|
||||||
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
# from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,15 @@
|
||||||
Tool to load model and binarize a given image.
|
Tool to load model and binarize a given image.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# pyright: reportIndexIssue=false
|
||||||
|
# pyright: reportCallIssue=false
|
||||||
|
# pyright: reportArgumentType=false
|
||||||
|
# pyright: reportPossiblyUnboundVariable=false
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cv2
|
import cv2
|
||||||
|
|
@ -326,7 +331,7 @@ class SbbBinarizer:
|
||||||
image = cv2.imread(image_path)
|
image = cv2.imread(image_path)
|
||||||
img_last = 0
|
img_last = 0
|
||||||
for n, (model_file, model) in enumerate(self.models.items()):
|
for n, (model_file, model) in enumerate(self.models.items()):
|
||||||
self.log.info('Predicting %s with model %s [%s/%s]', image_path if image_path else '[image]', model_file, n + 1, len(self.models.keys()))
|
self.logger.info('Predicting %s with model %s [%s/%s]', image_path if image_path else '[image]', model_file, n + 1, len(self.models.keys()))
|
||||||
res = self.predict(model, image, use_patches)
|
res = self.predict(model, image, use_patches)
|
||||||
|
|
||||||
img_fin = np.zeros((res.shape[0], res.shape[1], 3))
|
img_fin = np.zeros((res.shape[0], res.shape[1], 3))
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ def eynollah_subcommands():
|
||||||
'layout',
|
'layout',
|
||||||
'ocr',
|
'ocr',
|
||||||
'enhancement',
|
'enhancement',
|
||||||
'machine-based-reading-order'
|
'machine-based-reading-order',
|
||||||
'models'
|
'models',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue