pep 8 code style

pep-8-code-style
cneud 4 months ago
parent 593cf64693
commit b0a7f62ada

1
.gitignore vendored

@ -1,6 +1,5 @@
*.egg-info
__pycache__
sbb_newspapers_org_image/pylint.log
models_eynollah*
output.html
/build

@ -1,7 +1,7 @@
import sys
import click
import sys
from ocrd_utils import initLogging, setOverrideLogLevel
from src.eynollah.eynollah import Eynollah
from .eynollah import Eynollah
@click.command()

@ -1,40 +1,40 @@
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring,missing-class-docstring,too-many-branches
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods
# pylint: disable=consider-using-enumerate
"""
document layout analysis (segmentation) with output in PAGE-XML
"""
import math
import os
import sys
import time
import warnings
from pathlib import Path
from multiprocessing import Process, Queue, cpu_count
import gc
from ocrd_utils import getLogger
from pathlib import Path
import cv2
import numpy as np
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
stderr = sys.stderr
sys.stderr = open(os.devnull, "w")
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.keras.models import load_model
sys.stderr = stderr
tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore")
from scipy.signal import find_peaks
import matplotlib.pyplot as plt
from ocrd_utils import getLogger
from scipy.ndimage import gaussian_filter1d
# use tf1 compatibility for keras backend
from tensorflow.compat.v1.keras.backend import set_session
from scipy.signal import find_peaks
from tensorflow.python.keras import backend as K
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from .plot import EynollahPlotter
from .utils import (
boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box,
find_num_col,
otsu_copy_binary,
put_drop_out_from_only_drop_model,
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
check_any_text_region_in_model_one_is_main_or_header,
check_any_text_region_in_model_one_is_main_or_header_light,
small_textlines_to_parent_adherence2,
order_of_regions,
find_number_of_columns_in_document,
return_boxes_of_images_by_order_of_reading_new)
from .utils.contour import (
filter_contours_area_of_image,
filter_contours_area_of_image_tables,
@ -50,6 +50,12 @@ from .utils.contour import (
return_contours_of_interested_textline,
return_parent_contours,
)
from .utils.drop_capitals import (
adhere_drop_capital_region_into_corresponding_textline,
filter_small_drop_capitals_from_no_patch_layout)
from .utils.marginals import get_marginals
from .utils.pil_cv2 import check_dpi, pil2cv
from .utils.resize import resize_image
from .utils.rotate import (
rotate_image,
rotation_not_90_func,
@ -58,29 +64,16 @@ from .utils.separate_lines import (
textline_contours_postprocessing,
separate_lines_new2,
return_deskew_slop)
from .utils.drop_capitals import (
adhere_drop_capital_region_into_corresponding_textline,
filter_small_drop_capitals_from_no_patch_layout)
from .utils.marginals import get_marginals
from .utils.resize import resize_image
from .utils import (
boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box,
find_num_col,
otsu_copy_binary,
put_drop_out_from_only_drop_model,
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
check_any_text_region_in_model_one_is_main_or_header,
check_any_text_region_in_model_one_is_main_or_header_light,
small_textlines_to_parent_adherence2,
order_of_regions,
find_number_of_columns_in_document,
return_boxes_of_images_by_order_of_reading_new)
from .utils.pil_cv2 import check_dpi, pil2cv
from .utils.xml import order_and_id_of_texts
from .plot import EynollahPlotter
from .writer import EynollahXmlWriter
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
tf.get_logger().setLevel("ERROR")
warnings.filterwarnings("ignore")
stderr = sys.stderr
sys.stderr = open(os.devnull, "w")
sys.stderr = stderr
SLOPE_THRESHOLD = 0.13
RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 # 98.45:
DPI_THRESHOLD = 298
@ -244,7 +237,7 @@ class Eynollah:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
set_session(session)
K.set_session(session)
self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
@ -260,7 +253,7 @@ class Eynollah:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
set_session(session)
K.set_session(session)
self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
@ -276,7 +269,7 @@ class Eynollah:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
set_session(session)
K.set_session(session)
self.model_page = self.our_load_model(self.model_page_dir)
self.model_classifier = self.our_load_model(self.model_dir_of_col_classifier)
@ -2566,6 +2559,7 @@ class Eynollah:
image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :]
else:
image_revised_last = np.zeros((image_regions_eraly_p.shape[0], image_regions_eraly_p.shape[1], 3))
for i in range(len(boxes)):
image_box = img_comm[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :]
image_revised_last[int(boxes[i][2]):int(boxes[i][3]), int(boxes[i][0]):int(boxes[i][1]), :] = image_box[:, :, :]
@ -3345,11 +3339,9 @@ class Eynollah:
else:
pass
if self.light_version:
txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image,
slope_first)
txt_con_org = get_textregion_contours_in_org_image_light(contours_only_text_parent, self.image, slope_first)
else:
txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image,
slope_first)
txt_con_org = get_textregion_contours_in_org_image(contours_only_text_parent, self.image, slope_first)
boxes_text, _ = get_text_region_boxes_by_given_contours(contours_only_text_parent)
boxes_marginals, _ = get_text_region_boxes_by_given_contours(polygons_of_marginals)

@ -1,6 +1,6 @@
from .processor import EynollahProcessor
from click import command
from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor
from .processor import EynollahProcessor
@command()

@ -1,10 +1,9 @@
import matplotlib.pyplot as plt
import cv2
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
import os.path
import cv2
from scipy.ndimage import gaussian_filter1d
from .utils import crop_image_inside_box
from .utils.rotate import rotate_image_different
from .utils.resize import resize_image

@ -1,14 +1,7 @@
from json import loads
from pkg_resources import resource_string
from tempfile import NamedTemporaryFile
from pathlib import Path
from os.path import join
from PIL import Image
from ocrd import Processor
from ocrd_modelfactory import page_from_file, exif_from_filename
from ocrd_models import OcrdFile, OcrdExif
from ocrd_modelfactory import page_from_file
from ocrd_models.ocrd_page import to_xml
from ocrd_utils import (
getLogger,
@ -16,9 +9,8 @@ from ocrd_utils import (
assert_file_grp_cardinality,
make_file_id
)
from pkg_resources import resource_string
from .eynollah import Eynollah
from .utils.pil_cv2 import pil2cv
OCRD_TOOL = loads(resource_string(__name__, 'ocrd-tool.json').decode('utf8'))

@ -1,17 +1,18 @@
import cv2
import imutils
import math
import matplotlib.pyplot as plt
import numpy as np
from shapely import geometry
import cv2
import imutils
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
from shapely import geometry
from .is_nan import isNaN
from .contour import (contours_in_same_horizon,
find_new_features_of_contours,
return_contours_of_image,
return_parent_contours)
from .contour import (
contours_in_same_horizon,
find_new_features_of_contours,
return_contours_of_image,
return_parent_contours
)
def return_x_start_end_mothers_childs_and_type_of_reading_order(x_min_hor_some, x_max_hor_some, cy_hor_some, peak_points, cy_hor_diff):
@ -1646,8 +1647,7 @@ def find_number_of_columns_in_document(region_pre_p, num_col_classifier, tables,
# regions_without_separators_tile=cv2.erode(regions_without_separators_tile,kernel,iterations = 3)
try:
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables,
multiplier=7.0)
num_col, peaks_neg_fin = find_num_col(regions_without_separators_tile, num_col_classifier, tables, multiplier=7.0)
except:
num_col = 0
peaks_neg_fin = []

@ -73,8 +73,8 @@ def filter_contours_area_of_image_tables(image, contours, hierarchy, max_area, m
# print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area
# print(hierarchy[0][jv][3],hierarchy )
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(
image.shape[:2]): # and hierarchy[0][jv][3]==-1 :
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]):
# and hierarchy[0][jv][3]==-1 :
# print(c[0][0][1])
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
return found_polygons_early

@ -1,5 +1,6 @@
import numpy as np
import cv2
import numpy as np
from .contour import (
find_new_features_of_contours,
return_contours_of_image,

@ -1,8 +1,7 @@
import numpy as np
import cv2
import numpy as np
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
from .contour import find_new_features_of_contours, return_contours_of_interested_region
from .resize import resize_image
from .rotate import rotate_image

@ -1,9 +1,13 @@
from PIL import Image
from cv2 import (
COLOR_GRAY2BGR,
COLOR_RGB2BGR,
COLOR_BGR2RGB,
cvtColor,
imread
)
import numpy as np
from PIL import Image
from ocrd_models import OcrdExif
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, COLOR_BGR2RGB, cvtColor, imread
# from sbb_binarization
def cv2pil(img):

@ -1,7 +1,6 @@
import math
import imutils
import cv2
import imutils
import math
def rotatedRectWithMaxArea(w, h, angle):

@ -1,21 +1,18 @@
import numpy as np
import cv2
from scipy.signal import find_peaks
import numpy as np
from scipy.ndimage import gaussian_filter1d
import os
from .rotate import rotate_image
from scipy.signal import find_peaks
from . import (
find_num_col_deskew,
isNaN,
)
from .contour import (
return_parent_contours,
filter_contours_area_of_image_tables,
return_contours_of_image,
filter_contours_area_of_image
)
from .is_nan import isNaN
from . import (
find_num_col_deskew,
isNaN,
)
from .rotate import rotate_image
def dedup_separate_lines(img_patch, contour_text_interest, thetha, axis):
@ -477,7 +474,6 @@ def separate_lines(img_patch, contour_text_interest, thetha, x_help, y_help):
[int(x_max), int(y_max)],
[int(x_min), int(y_max)]]))
elif len(peaks) == 2:
dis_to_next = np.abs(peaks[1] - peaks[0])
for jj in range(len(peaks)):

@ -1,9 +1,7 @@
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=invalid-name
from .counter import EynollahIdCounter
import numpy as np
from datetime import datetime
from ocrd_models.ocrd_page import (
CoordsType,
GlyphType,
@ -26,8 +24,9 @@ from ocrd_models.ocrd_page import (
UnorderedGroupIndexedType,
UnorderedGroupType,
WordType,
to_xml)
to_xml
)
from .counter import EynollahIdCounter
def create_page_xml(imageFilename, height, width):

@ -1,12 +1,8 @@
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
# pylint: disable=import-error
from pathlib import Path
import numpy as np
import os.path
from .utils.xml import create_page_xml, xml_reading_order
from .utils.counter import EynollahIdCounter
from ocrd_utils import getLogger
from pathlib import Path
from ocrd_models.ocrd_page import (
BorderType,
CoordsType,
@ -18,7 +14,9 @@ from ocrd_models.ocrd_page import (
SeparatorRegionType,
to_xml
)
import numpy as np
from ocrd_utils import getLogger
from .utils.xml import create_page_xml, xml_reading_order
from .utils.counter import EynollahIdCounter
class EynollahXmlWriter():

@ -1,12 +1,8 @@
# pylint: disable=unused-import
from os.path import dirname, realpath
from os import chdir
import sys
import logging
import io
import collections
from unittest import TestCase as VanillaTestCase, skip, main as unittests_main
from os import chdir
from os.path import dirname, realpath
from unittest import TestCase as VanillaTestCase, main as unittests_main
import pytest
from ocrd_utils import disableLogging, initLogging

@ -1,7 +1,7 @@
import cv2
from pathlib import Path
from src.eynollah.utils.pil_cv2 import check_dpi
from tests.base import main
from src.eynollah.utils.pil_cv2 import check_dpi
def test_dpi():

@ -1,6 +1,6 @@
from pytest import main
from src.eynollah.utils.xml import create_page_xml
from ocrd_models.ocrd_page import to_xml
from src.eynollah.utils.xml import create_page_xml
PAGE_2019 = 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15'

Loading…
Cancel
Save