pep 8 code style

pep-8-code-style
cneud 3 months ago
parent 593cf64693
commit b0a7f62ada

1
.gitignore vendored

@ -1,6 +1,5 @@
*.egg-info *.egg-info
__pycache__ __pycache__
sbb_newspapers_org_image/pylint.log
models_eynollah* models_eynollah*
output.html output.html
/build /build

@ -1,7 +1,7 @@
import sys
import click import click
import sys
from ocrd_utils import initLogging, setOverrideLogLevel from ocrd_utils import initLogging, setOverrideLogLevel
from src.eynollah.eynollah import Eynollah from .eynollah import Eynollah
@click.command() @click.command()

@ -1,40 +1,40 @@
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches # pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member # pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods, # pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods
# pylint: disable=consider-using-enumerate # pylint: disable=consider-using-enumerate
""" """
document layout analysis (segmentation) with output in PAGE-XML document layout analysis (segmentation) with output in PAGE-XML
""" """
import math import math
import os import os
import sys import sys
import time import time
import warnings import warnings
from pathlib import Path
from multiprocessing import Process, Queue, cpu_count from multiprocessing import Process, Queue, cpu_count
import gc from pathlib import Path
from ocrd_utils import getLogger
import cv2 import cv2
import numpy as np import numpy as np
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
stderr = sys.stderr
sys.stderr = open(os.devnull, "w")
import tensorflow as tf import tensorflow as tf
from tensorflow.python.keras import backend as K from ocrd_utils import getLogger
from tensorflow.keras.models import load_model
sys.stderr = stderr
tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore")
from scipy.signal import find_peaks
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d from scipy.ndimage import gaussian_filter1d
# use tf1 compatibility for keras backend from scipy.signal import find_peaks
from tensorflow.compat.v1.keras.backend import set_session from tensorflow.python.keras import backend as K
from tensorflow.keras import layers from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from .plot import EynollahPlotter
from .utils import (
boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box,
find_num_col,
otsu_copy_binary,
put_drop_out_from_only_drop_model,
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
check_any_text_region_in_model_one_is_main_or_header,
check_any_text_region_in_model_one_is_main_or_header_light,
small_textlines_to_parent_adherence2,
order_of_regions,
find_number_of_columns_in_document,
return_boxes_of_images_by_order_of_reading_new)
from .utils.contour import ( from .utils.contour import (
filter_contours_area_of_image, filter_contours_area_of_image,
filter_contours_area_of_image_tables, filter_contours_area_of_image_tables,
@ -50,6 +50,12 @@ from .utils.contour import (
return_contours_of_interested_textline, return_contours_of_interested_textline,
return_parent_contours, return_parent_contours,
) )
from .utils.drop_capitals import (
adhere_drop_capital_region_into_corresponding_textline,
filter_small_drop_capitals_from_no_patch_layout)
from .utils.marginals import get_marginals
from .utils.pil_cv2 import check_dpi, pil2cv
from .utils.resize import resize_image
from .utils.rotate import ( from .utils.rotate import (
rotate_image, rotate_image,
rotation_not_90_func, rotation_not_90_func,
@ -58,29 +64,16 @@ from .utils.separate_lines import (
textline_contours_postprocessing, textline_contours_postprocessing,
separate_lines_new2, separate_lines_new2,
return_deskew_slop) return_deskew_slop)
from .utils.drop_capitals import (
adhere_drop_capital_region_into_corresponding_textline,
filter_small_drop_capitals_from_no_patch_layout)
from .utils.marginals import get_marginals
from .utils.resize import resize_image
from .utils import (
boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box,
find_num_col,
otsu_copy_binary,
put_drop_out_from_only_drop_model,
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
check_any_text_region_in_model_one_is_main_or_header,
check_any_text_region_in_model_one_is_main_or_header_light,
small_textlines_to_parent_adherence2,
order_of_regions,
find_number_of_columns_in_document,
return_boxes_of_images_by_order_of_reading_new)
from .utils.pil_cv2 import check_dpi, pil2cv
from .utils.xml import order_and_id_of_texts from .utils.xml import order_and_id_of_texts
from .plot import EynollahPlotter
from .writer import EynollahXmlWriter from .writer import EynollahXmlWriter
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore")
stderr = sys.stderr
sys.stderr = open(os.devnull, "w")
sys.stderr = stderr
SLOPE_THRESHOLD = 0.13 SLOPE_THRESHOLD = 0.13
RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 # 98.45: RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 # 98.45:
DPI_THRESHOLD = 298 DPI_THRESHOLD = 298
@ -244,7 +237,7 @@ class Eynollah:
config = tf.compat.v1.ConfigProto() config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config) session = tf.compat.v1.Session(config=config)
set_session(session) K.set_session(session)
self.model_page = self.our_load_model(self.model_page_dir) self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
@ -260,7 +253,7 @@ class Eynollah:
config = tf.compat.v1.ConfigProto() config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config) session = tf.compat.v1.Session(config=config)
set_session(session) K.set_session(session)
self.model_page = self.our_load_model(self.model_page_dir) self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
@ -276,7 +269,7 @@ class Eynollah:
config = tf.compat.v1.ConfigProto() config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config) session = tf.compat.v1.Session(config=config)
set_session(session) K.set_session(session)
self.model_page = self.our_load_model(self.model_page_dir) self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier) self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
@ -2566,6 +2559,7 @@ class Eynollah:
image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :] image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :]
else: else:
image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1], 3))
for i in range(len(boxes)): for i in range(len(boxes)):
image_box = img_comm[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] image_box = img_comm[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :]
image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :] image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :]
@ -3345,11 +3339,9 @@ class Eynollah:
else: else:
pass pass
if self.light_version: if self.light_version:
txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first)
slope_first)
else: else:
txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first)
slope_first)
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent) boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals) boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)

@ -1,6 +1,6 @@
from .processor import EynollahProcessor
from click import command from click import command
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
from .processor import EynollahProcessor
@command() @command()

@ -1,10 +1,9 @@
import matplotlib.pyplot as plt import cv2
import matplotlib.patches as mpatches import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import os.path import os.path
import cv2
from scipy.ndimage import gaussian_filter1d from scipy.ndimage import gaussian_filter1d
from .utils import crop_image_inside_box from .utils import crop_image_inside_box
from .utils.rotate import rotate_image_different from .utils.rotate import rotate_image_different
from .utils.resize import resize_image from .utils.resize import resize_image

@ -1,14 +1,7 @@
from json import loads from json import loads
from pkg_resources import resource_string
from tempfile import NamedTemporaryFile
from pathlib import Path
from os.path import join from os.path import join
from PIL import Image
from ocrd import Processor from ocrd import Processor
from ocrd_modelfactory import page_from_file, exif_from_filename from ocrd_modelfactory import page_from_file
from ocrd_models import OcrdFile, OcrdExif
from ocrd_models.ocrd_page import to_xml from ocrd_models.ocrd_page import to_xml
from ocrd_utils import ( from ocrd_utils import (
getLogger, getLogger,
@ -16,9 +9,8 @@ from ocrd_utils import (
assert_file_grp_cardinality, assert_file_grp_cardinality,
make_file_id make_file_id
) )
from pkg_resources import resource_string
from .eynollah import Eynollah from .eynollah import Eynollah
from .utils.pil_cv2 import pil2cv
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8')) OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))

@ -1,17 +1,18 @@
import cv2
import imutils
import math import math
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from shapely import geometry
import cv2
import imutils
from scipy.signal import find_peaks from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d from scipy.ndimage import gaussian_filter1d
from shapely import geometry
from .is_nan import isNaN from .is_nan import isNaN
from .contour import (contours_in_same_horizon, from .contour import (
contours_in_same_horizon,
find_new_features_of_contours, find_new_features_of_contours,
return_contours_of_image, return_contours_of_image,
return_parent_contours) return_parent_contours
)
def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff): def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
@ -1646,8 +1647,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
# regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3) # regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3)
try: try:
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0)
multiplier=7.0)
except: except:
num_col = 0 num_col = 0
peaks_neg_fin = [] peaks_neg_fin = []

@ -73,8 +73,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
# print(np.prod(thresh.shape[:2])) # print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area # Check that polygon has area greater than minimal area
# print(hierarchy[0][jv][3],hierarchy ) # print(hierarchy[0][jv][3],hierarchy )
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod( if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):
image.shape[:2]): # and hierarchy[0][jv][3]==-1 : # and hierarchy[0][jv][3]==-1 :
# print(c[0][0][1]) # print(c[0][0][1])
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32)) found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
return found_polygons_early return found_polygons_early

@ -1,5 +1,6 @@
import numpy as np
import cv2 import cv2
import numpy as np
from .contour import ( from .contour import (
find_new_features_of_contours, find_new_features_of_contours,
return_contours_of_image, return_contours_of_image,

@ -1,8 +1,7 @@
import numpy as np
import cv2 import cv2
import numpy as np
from scipy.signal import find_peaks from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d from scipy.ndimage import gaussian_filter1d
from .contour import find_new_features_of_contours, return_contours_of_interested_region from .contour import find_new_features_of_contours, return_contours_of_interested_region
from .resize import resize_image from .resize import resize_image
from .rotate import rotate_image from .rotate import rotate_image

@ -1,9 +1,13 @@
from PIL import Image from cv2 import (
COLOR_GRAY2BGR,
COLOR_RGB2BGR,
COLOR_BGR2RGB,
cvtColor,
imread
)
import numpy as np import numpy as np
from PIL import Image
from ocrd_models import OcrdExif from ocrd_models import OcrdExif
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread
# from sbb_binarization
def cv2pil(img): def cv2pil(img):

@ -1,7 +1,6 @@
import math
import imutils
import cv2 import cv2
import imutils
import math
def rotatedRectWithMaxArea(w, h, angle): def rotatedRectWithMaxArea(w, h, angle):

@ -1,21 +1,18 @@
import numpy as np
import cv2 import cv2
from scipy.signal import find_peaks import numpy as np
from scipy.ndimage import gaussian_filter1d from scipy.ndimage import gaussian_filter1d
import os from scipy.signal import find_peaks
from . import (
from .rotate import rotate_image find_num_col_deskew,
isNaN,
)
from .contour import ( from .contour import (
return_parent_contours, return_parent_contours,
filter_contours_area_of_image_tables, filter_contours_area_of_image_tables,
return_contours_of_image, return_contours_of_image,
filter_contours_area_of_image filter_contours_area_of_image
) )
from .is_nan import isNaN from .rotate import rotate_image
from . import (
find_num_col_deskew,
isNaN,
)
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis): def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -477,7 +474,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
[int(x_max), int(y_max)], [int(x_max), int(y_max)],
[int(x_min), int(y_max)]])) [int(x_min), int(y_max)]]))
elif len(peaks) == 2: elif len(peaks) == 2:
dis_to_next = np.abs(peaks[1] - peaks[0]) dis_to_next = np.abs(peaks[1] - peaks[0])
for jj in range(len(peaks)): for jj in range(len(peaks)):

@ -1,9 +1,7 @@
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member # pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=invalid-name # pylint: disable=invalid-name
from .counter import EynollahIdCounter
import numpy as np import numpy as np
from datetime import datetime from datetime import datetime
from ocrd_models.ocrd_page import ( from ocrd_models.ocrd_page import (
CoordsType, CoordsType,
GlyphType, GlyphType,
@ -26,8 +24,9 @@ from ocrd_models.ocrd_page import (
UnorderedGroupIndexedType, UnorderedGroupIndexedType,
UnorderedGroupType, UnorderedGroupType,
WordType, WordType,
to_xml
to_xml) )
from .counter import EynollahIdCounter
def create_page_xml(imageFilename, height, width): def create_page_xml(imageFilename, height, width):

@ -1,12 +1,8 @@
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member # pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=import-error # pylint: disable=import-error
from pathlib import Path import numpy as np
import os.path import os.path
from pathlib import Path
from .utils.xml import create_page_xml, xml_reading_order
from .utils.counter import EynollahIdCounter
from ocrd_utils import getLogger
from ocrd_models.ocrd_page import ( from ocrd_models.ocrd_page import (
BorderType, BorderType,
CoordsType, CoordsType,
@ -18,7 +14,9 @@ from ocrd_models.ocrd_page import (
SeparatorRegionType, SeparatorRegionType,
to_xml to_xml
) )
import numpy as np from ocrd_utils import getLogger
from .utils.xml import create_page_xml, xml_reading_order
from .utils.counter import EynollahIdCounter
class EynollahXmlWriter(): class EynollahXmlWriter():

@ -1,12 +1,8 @@
# pylint: disable=unused-import # pylint: disable=unused-import
from os.path import dirname, realpath
from os import chdir
import sys import sys
import logging from os import chdir
import io from os.path import dirname, realpath
import collections from unittest import TestCase as VanillaTestCase, main as unittests_main
from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
import pytest import pytest
from ocrd_utils import disableLogging, initLogging from ocrd_utils import disableLogging, initLogging

@ -1,7 +1,7 @@
import cv2 import cv2
from pathlib import Path from pathlib import Path
from src.eynollah.utils.pil_cv2 import check_dpi
from tests.base import main from tests.base import main
from src.eynollah.utils.pil_cv2 import check_dpi
def test_dpi(): def test_dpi():

@ -1,6 +1,6 @@
from pytest import main from pytest import main
from src.eynollah.utils.xml import create_page_xml
from ocrd_models.ocrd_page import to_xml from ocrd_models.ocrd_page import to_xml
from src.eynollah.utils.xml import create_page_xml
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15' PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'

Loading…
Cancel
Save