mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-07-04 00:19:55 +02:00
pep 8 code style
This commit is contained in:
parent
593cf64693
commit
b0a7f62ada
18 changed files with 91 additions and 117 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,6 +1,5 @@
|
|||
*.egg-info
|
||||
__pycache__
|
||||
sbb_newspapers_org_image/pylint.log
|
||||
models_eynollah*
|
||||
output.html
|
||||
/build
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import sys
|
||||
import click
|
||||
import sys
|
||||
from ocrd_utils import initLogging, setOverrideLogLevel
|
||||
from src.eynollah.eynollah import Eynollah
|
||||
from .eynollah import Eynollah
|
||||
|
||||
|
||||
@click.command()
|
||||
|
|
|
@ -1,40 +1,40 @@
|
|||
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
|
||||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
||||
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
|
||||
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods
|
||||
# pylint: disable=consider-using-enumerate
|
||||
"""
|
||||
document layout analysis (segmentation) with output in PAGE-XML
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from multiprocessing import Process, Queue, cpu_count
|
||||
import gc
|
||||
from ocrd_utils import getLogger
|
||||
from pathlib import Path
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
stderr = sys.stderr
|
||||
sys.stderr = open(os.devnull, "w")
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.keras import backend as K
|
||||
from tensorflow.keras.models import load_model
|
||||
|
||||
sys.stderr = stderr
|
||||
tf.get_logger().setLevel("ERROR")
|
||||
warnings.filterwarnings("ignore")
|
||||
from scipy.signal import find_peaks
|
||||
import matplotlib.pyplot as plt
|
||||
from ocrd_utils import getLogger
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
# use tf1 compatibility for keras backend
|
||||
from tensorflow.compat.v1.keras.backend import set_session
|
||||
from scipy.signal import find_peaks
|
||||
from tensorflow.python.keras import backend as K
|
||||
from tensorflow.keras import layers
|
||||
|
||||
from tensorflow.keras.models import load_model
|
||||
from .plot import EynollahPlotter
|
||||
from .utils import (
|
||||
boosting_headers_by_longshot_region_segmentation,
|
||||
crop_image_inside_box,
|
||||
find_num_col,
|
||||
otsu_copy_binary,
|
||||
put_drop_out_from_only_drop_model,
|
||||
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
||||
check_any_text_region_in_model_one_is_main_or_header,
|
||||
check_any_text_region_in_model_one_is_main_or_header_light,
|
||||
small_textlines_to_parent_adherence2,
|
||||
order_of_regions,
|
||||
find_number_of_columns_in_document,
|
||||
return_boxes_of_images_by_order_of_reading_new)
|
||||
from .utils.contour import (
|
||||
filter_contours_area_of_image,
|
||||
filter_contours_area_of_image_tables,
|
||||
|
@ -50,6 +50,12 @@ from .utils.contour import (
|
|||
return_contours_of_interested_textline,
|
||||
return_parent_contours,
|
||||
)
|
||||
from .utils.drop_capitals import (
|
||||
adhere_drop_capital_region_into_corresponding_textline,
|
||||
filter_small_drop_capitals_from_no_patch_layout)
|
||||
from .utils.marginals import get_marginals
|
||||
from .utils.pil_cv2 import check_dpi, pil2cv
|
||||
from .utils.resize import resize_image
|
||||
from .utils.rotate import (
|
||||
rotate_image,
|
||||
rotation_not_90_func,
|
||||
|
@ -58,29 +64,16 @@ from .utils.separate_lines import (
|
|||
textline_contours_postprocessing,
|
||||
separate_lines_new2,
|
||||
return_deskew_slop)
|
||||
from .utils.drop_capitals import (
|
||||
adhere_drop_capital_region_into_corresponding_textline,
|
||||
filter_small_drop_capitals_from_no_patch_layout)
|
||||
from .utils.marginals import get_marginals
|
||||
from .utils.resize import resize_image
|
||||
from .utils import (
|
||||
boosting_headers_by_longshot_region_segmentation,
|
||||
crop_image_inside_box,
|
||||
find_num_col,
|
||||
otsu_copy_binary,
|
||||
put_drop_out_from_only_drop_model,
|
||||
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
||||
check_any_text_region_in_model_one_is_main_or_header,
|
||||
check_any_text_region_in_model_one_is_main_or_header_light,
|
||||
small_textlines_to_parent_adherence2,
|
||||
order_of_regions,
|
||||
find_number_of_columns_in_document,
|
||||
return_boxes_of_images_by_order_of_reading_new)
|
||||
from .utils.pil_cv2 import check_dpi, pil2cv
|
||||
from .utils.xml import order_and_id_of_texts
|
||||
from .plot import EynollahPlotter
|
||||
from .writer import EynollahXmlWriter
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
tf.get_logger().setLevel("ERROR")
|
||||
warnings.filterwarnings("ignore")
|
||||
stderr = sys.stderr
|
||||
sys.stderr = open(os.devnull, "w")
|
||||
sys.stderr = stderr
|
||||
|
||||
SLOPE_THRESHOLD = 0.13
|
||||
RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 # 98.45:
|
||||
DPI_THRESHOLD = 298
|
||||
|
@ -244,7 +237,7 @@ class Eynollah:
|
|||
config = tf.compat.v1.ConfigProto()
|
||||
config.gpu_options.allow_growth = True
|
||||
session = tf.compat.v1.Session(config=config)
|
||||
set_session(session)
|
||||
K.set_session(session)
|
||||
|
||||
self.model_page = self.our_load_model(self.model_page_dir)
|
||||
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
|
||||
|
@ -260,7 +253,7 @@ class Eynollah:
|
|||
config = tf.compat.v1.ConfigProto()
|
||||
config.gpu_options.allow_growth = True
|
||||
session = tf.compat.v1.Session(config=config)
|
||||
set_session(session)
|
||||
K.set_session(session)
|
||||
|
||||
self.model_page = self.our_load_model(self.model_page_dir)
|
||||
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
|
||||
|
@ -276,7 +269,7 @@ class Eynollah:
|
|||
config = tf.compat.v1.ConfigProto()
|
||||
config.gpu_options.allow_growth = True
|
||||
session = tf.compat.v1.Session(config=config)
|
||||
set_session(session)
|
||||
K.set_session(session)
|
||||
|
||||
self.model_page = self.our_load_model(self.model_page_dir)
|
||||
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
|
||||
|
@ -2566,6 +2559,7 @@ class Eynollah:
|
|||
|
||||
image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :]
|
||||
else:
|
||||
image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1], 3))
|
||||
for i in range(len(boxes)):
|
||||
image_box = img_comm[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :]
|
||||
image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :]
|
||||
|
@ -3345,11 +3339,9 @@ class Eynollah:
|
|||
else:
|
||||
pass
|
||||
if self.light_version:
|
||||
txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image,
|
||||
slope_first)
|
||||
txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first)
|
||||
else:
|
||||
txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image,
|
||||
slope_first)
|
||||
txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first)
|
||||
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
|
||||
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from .processor import EynollahProcessor
|
||||
from click import command
|
||||
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
|
||||
from .processor import EynollahProcessor
|
||||
|
||||
|
||||
@command()
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
import matplotlib.pyplot as plt
|
||||
import cv2
|
||||
import matplotlib.patches as mpatches
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import os.path
|
||||
import cv2
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
|
||||
from .utils import crop_image_inside_box
|
||||
from .utils.rotate import rotate_image_different
|
||||
from .utils.resize import resize_image
|
||||
|
|
|
@ -1,14 +1,7 @@
|
|||
from json import loads
|
||||
from pkg_resources import resource_string
|
||||
from tempfile import NamedTemporaryFile
|
||||
from pathlib import Path
|
||||
from os.path import join
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from ocrd import Processor
|
||||
from ocrd_modelfactory import page_from_file, exif_from_filename
|
||||
from ocrd_models import OcrdFile, OcrdExif
|
||||
from ocrd_modelfactory import page_from_file
|
||||
from ocrd_models.ocrd_page import to_xml
|
||||
from ocrd_utils import (
|
||||
getLogger,
|
||||
|
@ -16,9 +9,8 @@ from ocrd_utils import (
|
|||
assert_file_grp_cardinality,
|
||||
make_file_id
|
||||
)
|
||||
|
||||
from pkg_resources import resource_string
|
||||
from .eynollah import Eynollah
|
||||
from .utils.pil_cv2 import pil2cv
|
||||
|
||||
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))
|
||||
|
||||
|
|
|
@ -1,17 +1,18 @@
|
|||
import cv2
|
||||
import imutils
|
||||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from shapely import geometry
|
||||
import cv2
|
||||
import imutils
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
|
||||
from shapely import geometry
|
||||
from .is_nan import isNaN
|
||||
from .contour import (contours_in_same_horizon,
|
||||
from .contour import (
|
||||
contours_in_same_horizon,
|
||||
find_new_features_of_contours,
|
||||
return_contours_of_image,
|
||||
return_parent_contours)
|
||||
return_parent_contours
|
||||
)
|
||||
|
||||
|
||||
def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
|
||||
|
@ -1646,8 +1647,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
|
|||
# regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3)
|
||||
|
||||
try:
|
||||
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables,
|
||||
multiplier=7.0)
|
||||
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0)
|
||||
except:
|
||||
num_col = 0
|
||||
peaks_neg_fin = []
|
||||
|
|
|
@ -73,8 +73,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
|
|||
# print(np.prod(thresh.shape[:2]))
|
||||
# Check that polygon has area greater than minimal area
|
||||
# print(hierarchy[0][jv][3],hierarchy )
|
||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(
|
||||
image.shape[:2]): # and hierarchy[0][jv][3]==-1 :
|
||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):
|
||||
# and hierarchy[0][jv][3]==-1 :
|
||||
# print(c[0][0][1])
|
||||
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
|
||||
return found_polygons_early
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from .contour import (
|
||||
find_new_features_of_contours,
|
||||
return_contours_of_image,
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
import numpy as np
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
|
||||
from .contour import find_new_features_of_contours, return_contours_of_interested_region
|
||||
from .resize import resize_image
|
||||
from .rotate import rotate_image
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
from PIL import Image
|
||||
from cv2 import (
|
||||
COLOR_GRAY2BGR,
|
||||
COLOR_RGB2BGR,
|
||||
COLOR_BGR2RGB,
|
||||
cvtColor,
|
||||
imread
|
||||
)
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from ocrd_models import OcrdExif
|
||||
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread
|
||||
|
||||
# from sbb_binarization
|
||||
|
||||
|
||||
def cv2pil(img):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import math
|
||||
|
||||
import imutils
|
||||
import cv2
|
||||
import imutils
|
||||
import math
|
||||
|
||||
|
||||
def rotatedRectWithMaxArea(w, h, angle):
|
||||
|
|
|
@ -1,21 +1,18 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
from scipy.signal import find_peaks
|
||||
import numpy as np
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
import os
|
||||
|
||||
from .rotate import rotate_image
|
||||
from scipy.signal import find_peaks
|
||||
from . import (
|
||||
find_num_col_deskew,
|
||||
isNaN,
|
||||
)
|
||||
from .contour import (
|
||||
return_parent_contours,
|
||||
filter_contours_area_of_image_tables,
|
||||
return_contours_of_image,
|
||||
filter_contours_area_of_image
|
||||
)
|
||||
from .is_nan import isNaN
|
||||
from . import (
|
||||
find_num_col_deskew,
|
||||
isNaN,
|
||||
)
|
||||
from .rotate import rotate_image
|
||||
|
||||
|
||||
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
|
||||
|
@ -477,7 +474,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
|
|||
[int(x_max), int(y_max)],
|
||||
[int(x_min), int(y_max)]]))
|
||||
|
||||
|
||||
elif len(peaks) == 2:
|
||||
dis_to_next = np.abs(peaks[1] - peaks[0])
|
||||
for jj in range(len(peaks)):
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
||||
# pylint: disable=invalid-name
|
||||
from .counter import EynollahIdCounter
|
||||
import numpy as np
|
||||
from datetime import datetime
|
||||
|
||||
from ocrd_models.ocrd_page import (
|
||||
CoordsType,
|
||||
GlyphType,
|
||||
|
@ -26,8 +24,9 @@ from ocrd_models.ocrd_page import (
|
|||
UnorderedGroupIndexedType,
|
||||
UnorderedGroupType,
|
||||
WordType,
|
||||
|
||||
to_xml)
|
||||
to_xml
|
||||
)
|
||||
from .counter import EynollahIdCounter
|
||||
|
||||
|
||||
def create_page_xml(imageFilename, height, width):
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
||||
# pylint: disable=import-error
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import os.path
|
||||
|
||||
from .utils.xml import create_page_xml, xml_reading_order
|
||||
from .utils.counter import EynollahIdCounter
|
||||
|
||||
from ocrd_utils import getLogger
|
||||
from pathlib import Path
|
||||
from ocrd_models.ocrd_page import (
|
||||
BorderType,
|
||||
CoordsType,
|
||||
|
@ -18,7 +14,9 @@ from ocrd_models.ocrd_page import (
|
|||
SeparatorRegionType,
|
||||
to_xml
|
||||
)
|
||||
import numpy as np
|
||||
from ocrd_utils import getLogger
|
||||
from .utils.xml import create_page_xml, xml_reading_order
|
||||
from .utils.counter import EynollahIdCounter
|
||||
|
||||
|
||||
class EynollahXmlWriter():
|
||||
|
|
|
@ -1,12 +1,8 @@
|
|||
# pylint: disable=unused-import
|
||||
|
||||
from os.path import dirname, realpath
|
||||
from os import chdir
|
||||
import sys
|
||||
import logging
|
||||
import io
|
||||
import collections
|
||||
from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
|
||||
from os import chdir
|
||||
from os.path import dirname, realpath
|
||||
from unittest import TestCase as VanillaTestCase, main as unittests_main
|
||||
import pytest
|
||||
from ocrd_utils import disableLogging, initLogging
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import cv2
|
||||
from pathlib import Path
|
||||
from src.eynollah.utils.pil_cv2 import check_dpi
|
||||
from tests.base import main
|
||||
from src.eynollah.utils.pil_cv2 import check_dpi
|
||||
|
||||
|
||||
def test_dpi():
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from pytest import main
|
||||
from src.eynollah.utils.xml import create_page_xml
|
||||
from ocrd_models.ocrd_page import to_xml
|
||||
from src.eynollah.utils.xml import create_page_xml
|
||||
|
||||
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue