mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-07 19:35:01 +02:00
Merge branch 'refactor' into main
This commit is contained in:
commit
044ff0c5a2
19 changed files with 10709 additions and 11737 deletions
28
.circleci/config.yml
Normal file
28
.circleci/config.yml
Normal file
|
@ -0,0 +1,28 @@
|
|||
version: 2
|
||||
|
||||
jobs:
|
||||
|
||||
build-python36:
|
||||
docker:
|
||||
- image: python:3.6
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
keys:
|
||||
- model-cache
|
||||
- run: make models
|
||||
- save_cache:
|
||||
key: model-cache
|
||||
paths:
|
||||
models_eynollah.tar.gz
|
||||
models_eynollah
|
||||
- run: make install
|
||||
- run: make smoke-test
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
build:
|
||||
jobs:
|
||||
- build-python36
|
||||
#- build-python37
|
||||
#- build-python38 # no tensorflow for python 3.8
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,2 +1,5 @@
|
|||
*.egg-info
|
||||
__pycache__
|
||||
sbb_newspapers_org_image/pylint.log
|
||||
models_eynollah*
|
||||
output.html
|
||||
|
|
19
Makefile
19
Makefile
|
@ -4,14 +4,26 @@ help:
|
|||
@echo ""
|
||||
@echo " Targets"
|
||||
@echo ""
|
||||
@echo " models Download and extract models to $(PWD)/models_eynollah"
|
||||
@echo " install Install with pip"
|
||||
@echo " install-dev Install editable with pip"
|
||||
@echo " test Run unit tests"
|
||||
@echo ""
|
||||
@echo " Variables"
|
||||
@echo ""
|
||||
|
||||
# END-EVAL
|
||||
|
||||
|
||||
# Download and extract models to $(PWD)/models_eynollah
|
||||
models: models_eynollah
|
||||
|
||||
models_eynollah: models_eynollah.tar.gz
|
||||
tar xf models_eynollah.tar.gz
|
||||
|
||||
models_eynollah.tar.gz:
|
||||
wget 'https://qurator-data.de/eynollah/models_eynollah.tar.gz'
|
||||
|
||||
# Install with pip
|
||||
install:
|
||||
pip install .
|
||||
|
@ -19,3 +31,10 @@ install:
|
|||
# Install editable with pip
|
||||
install-dev:
|
||||
pip install -e .
|
||||
|
||||
smoke-test:
|
||||
eynollah -i tests/resources/kant_aufklaerung_1784_0020.tif -o . -m $(PWD)/models_eynollah
|
||||
|
||||
# Run unit tests
|
||||
test:
|
||||
pytest tests
|
||||
|
|
2
requirements-test.txt
Normal file
2
requirements-test.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
pytest
|
||||
black
|
|
@ -1,7 +1,7 @@
|
|||
# ocrd includes opencv, numpy, shapely, click
|
||||
ocrd >= 2.20.1
|
||||
seaborn >= 0.11.0
|
||||
keras >= 2.3.1
|
||||
scikit-learn >= 0.23.2
|
||||
tensorflow >= 1.15, < 2
|
||||
tensorflow-gpu >= 1.15, < 2
|
||||
imutils >= 0.5.3
|
||||
matplotlib
|
||||
|
|
107
sbb_newspapers_org_image/cli.py
Normal file
107
sbb_newspapers_org_image/cli.py
Normal file
|
@ -0,0 +1,107 @@
|
|||
import click
|
||||
from sbb_newspapers_org_image.eynollah import eynollah
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False)
|
||||
)
|
||||
@click.option(
|
||||
"--out",
|
||||
"-o",
|
||||
help="directory to write output xml data",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--model",
|
||||
"-m",
|
||||
help="directory of models",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--save_images",
|
||||
"-si",
|
||||
help="if a directory is given, images in documents will be cropped and saved there",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--save_layout",
|
||||
"-sl",
|
||||
help="if a directory is given, plot of layout will be saved there",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--save_deskewed",
|
||||
"-sd",
|
||||
help="if a directory is given, deskewed image will be saved there",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--save_all",
|
||||
"-sa",
|
||||
help="if a directory is given, all plots needed for documentation will be saved there",
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--allow_enhancement",
|
||||
"-ae",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory",
|
||||
)
|
||||
@click.option(
|
||||
"--curved_line",
|
||||
"-cl",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
|
||||
)
|
||||
@click.option(
|
||||
"--full_layout",
|
||||
"-fl",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will try to return all elements of layout.",
|
||||
)
|
||||
@click.option(
|
||||
"--allow_scaling",
|
||||
"-as",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
|
||||
)
|
||||
@click.option(
|
||||
"--headers_off",
|
||||
"-ho",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool would ignore headers role in reading order",
|
||||
)
|
||||
def main(
|
||||
image,
|
||||
out,
|
||||
model,
|
||||
save_images,
|
||||
save_layout,
|
||||
save_deskewed,
|
||||
save_all,
|
||||
allow_enhancement,
|
||||
curved_line,
|
||||
full_layout,
|
||||
allow_scaling,
|
||||
headers_off,
|
||||
):
|
||||
eynollah(
|
||||
image,
|
||||
None,
|
||||
out,
|
||||
model,
|
||||
save_images,
|
||||
save_layout,
|
||||
save_deskewed,
|
||||
save_all,
|
||||
allow_enhancement,
|
||||
curved_line,
|
||||
full_layout,
|
||||
allow_scaling,
|
||||
headers_off,
|
||||
).run()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
File diff suppressed because it is too large
Load diff
3029
sbb_newspapers_org_image/unused.py
Normal file
3029
sbb_newspapers_org_image/unused.py
Normal file
File diff suppressed because it is too large
Load diff
2091
sbb_newspapers_org_image/utils/__init__.py
Normal file
2091
sbb_newspapers_org_image/utils/__init__.py
Normal file
File diff suppressed because it is too large
Load diff
298
sbb_newspapers_org_image/utils/contour.py
Normal file
298
sbb_newspapers_org_image/utils/contour.py
Normal file
|
@ -0,0 +1,298 @@
|
|||
import cv2
|
||||
import numpy as np
|
||||
from shapely import geometry
|
||||
|
||||
from .rotate import rotate_image, rotation_image_new
|
||||
|
||||
def contours_in_same_horizon(cy_main_hor):
|
||||
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
|
||||
|
||||
X1[0::1, :] = cy_main_hor[:]
|
||||
X2 = X1.T
|
||||
|
||||
X_dif = np.abs(X2 - X1)
|
||||
args_help = np.array(range(len(cy_main_hor)))
|
||||
all_args = []
|
||||
for i in range(len(cy_main_hor)):
|
||||
list_h = list(args_help[X_dif[i, :] <= 20])
|
||||
list_h.append(i)
|
||||
if len(list_h) > 1:
|
||||
all_args.append(list(set(list_h)))
|
||||
return np.unique(all_args)
|
||||
|
||||
def find_contours_mean_y_diff(contours_main):
|
||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
return np.mean(np.diff(np.sort(np.array(cy_main))))
|
||||
|
||||
def find_features_of_contours(contours_main):
|
||||
|
||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
|
||||
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||
|
||||
return y_min_main, y_max_main, areas_main
|
||||
|
||||
def return_contours_of_interested_region_and_bounding_box(region_pre_p, pixel):
|
||||
|
||||
# pixels of images are identified by 5
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.0003)
|
||||
|
||||
boxes = []
|
||||
|
||||
for jj in range(len(contours_imgs)):
|
||||
x, y, w, h = cv2.boundingRect(contours_imgs[jj])
|
||||
boxes.append([int(x), int(y), int(w), int(h)])
|
||||
return contours_imgs, boxes
|
||||
|
||||
def get_text_region_boxes_by_given_contours(contours):
|
||||
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
boxes = []
|
||||
contours_new = []
|
||||
for jj in range(len(contours)):
|
||||
x, y, w, h = cv2.boundingRect(contours[jj])
|
||||
|
||||
boxes.append([x, y, w, h])
|
||||
contours_new.append(contours[jj])
|
||||
|
||||
del contours
|
||||
return boxes, contours_new
|
||||
|
||||
def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area):
|
||||
found_polygons_early = list()
|
||||
|
||||
jv = 0
|
||||
for c in contours:
|
||||
if len(c) < 3: # A polygon cannot have less than 3 points
|
||||
continue
|
||||
|
||||
polygon = geometry.Polygon([point[0] for point in c])
|
||||
area = polygon.area
|
||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hirarchy[0][jv][3] == -1: # and hirarchy[0][jv][3]==-1 :
|
||||
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
|
||||
jv += 1
|
||||
return found_polygons_early
|
||||
|
||||
def filter_contours_area_of_image_interiors(image, contours, hirarchy, max_area, min_area):
|
||||
found_polygons_early = list()
|
||||
|
||||
jv = 0
|
||||
for c in contours:
|
||||
if len(c) < 3: # A polygon cannot have less than 3 points
|
||||
continue
|
||||
|
||||
polygon = geometry.Polygon([point[0] for point in c])
|
||||
area = polygon.area
|
||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hirarchy[0][jv][3] != -1:
|
||||
# print(c[0][0][1])
|
||||
found_polygons_early.append(np.array([point for point in polygon.exterior.coords], dtype=np.uint))
|
||||
jv += 1
|
||||
return found_polygons_early
|
||||
|
||||
|
||||
def filter_contours_area_of_image_tables(image, contours, hirarchy, max_area, min_area):
|
||||
found_polygons_early = list()
|
||||
|
||||
jv = 0
|
||||
for c in contours:
|
||||
if len(c) < 3: # A polygon cannot have less than 3 points
|
||||
continue
|
||||
|
||||
polygon = geometry.Polygon([point[0] for point in c])
|
||||
# area = cv2.contourArea(c)
|
||||
area = polygon.area
|
||||
##print(np.prod(thresh.shape[:2]))
|
||||
# Check that polygon has area greater than minimal area
|
||||
# print(hirarchy[0][jv][3],hirarchy )
|
||||
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hirarchy[0][jv][3]==-1 :
|
||||
# print(c[0][0][1])
|
||||
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
|
||||
jv += 1
|
||||
return found_polygons_early
|
||||
|
||||
def find_new_features_of_contoures(contours_main):
|
||||
|
||||
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
|
||||
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
|
||||
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
|
||||
try:
|
||||
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
|
||||
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
|
||||
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))])
|
||||
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))])
|
||||
|
||||
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
|
||||
|
||||
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
|
||||
except:
|
||||
x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))])
|
||||
|
||||
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))])
|
||||
|
||||
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))])
|
||||
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))])
|
||||
|
||||
x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))])
|
||||
|
||||
y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))])
|
||||
y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))])
|
||||
|
||||
# dis_x=np.abs(x_max_main-x_min_main)
|
||||
|
||||
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
|
||||
|
||||
def return_parent_contours(contours, hierarchy):
|
||||
contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
|
||||
return contours_parent
|
||||
|
||||
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
|
||||
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=min_area)
|
||||
|
||||
return contours_imgs
|
||||
|
||||
def get_textregion_contours_in_org_image(cnts, img, slope_first):
|
||||
|
||||
cnts_org = []
|
||||
# print(cnts,'cnts')
|
||||
for i in range(len(cnts)):
|
||||
img_copy = np.zeros(img.shape)
|
||||
img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1))
|
||||
|
||||
# plt.imshow(img_copy)
|
||||
# plt.show()
|
||||
|
||||
# print(img.shape,'img')
|
||||
img_copy = rotation_image_new(img_copy, -slope_first)
|
||||
##print(img_copy.shape,'img_copy')
|
||||
# plt.imshow(img_copy)
|
||||
# plt.show()
|
||||
|
||||
img_copy = img_copy.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
|
||||
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
|
||||
# print(np.shape(cont_int[0]))
|
||||
cnts_org.append(cont_int[0])
|
||||
|
||||
# print(cnts_org,'cnts_org')
|
||||
|
||||
# sys.exit()
|
||||
# self.y_shift = np.abs(img_copy.shape[0] - img.shape[0])
|
||||
# self.x_shift = np.abs(img_copy.shape[1] - img.shape[1])
|
||||
return cnts_org
|
||||
|
||||
def return_contours_of_interested_textline(region_pre_p, pixel):
|
||||
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.000000003)
|
||||
return contours_imgs
|
||||
|
||||
def return_bonding_box_of_contours(cnts):
|
||||
boxes_tot = []
|
||||
for i in range(len(cnts)):
|
||||
x, y, w, h = cv2.boundingRect(cnts[i])
|
||||
|
||||
box = [x, y, w, h]
|
||||
boxes_tot.append(box)
|
||||
return boxes_tot
|
||||
|
||||
def return_contours_of_image(image):
|
||||
|
||||
if len(image.shape) == 2:
|
||||
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
|
||||
image = image.astype(np.uint8)
|
||||
else:
|
||||
image = image.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
return contours, hierachy
|
||||
|
||||
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
|
||||
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=min_size)
|
||||
|
||||
return contours_imgs
|
||||
|
||||
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
|
||||
|
||||
# pixels of images are identified by 5
|
||||
if len(region_pre_p.shape) == 3:
|
||||
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
|
||||
else:
|
||||
cnts_images = (region_pre_p[:, :] == pixel) * 1
|
||||
cnts_images = cnts_images.astype(np.uint8)
|
||||
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
|
||||
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
|
||||
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=max_area, min_area=min_area)
|
||||
|
||||
img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
|
||||
img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
|
||||
return img_ret[:, :, 0]
|
||||
|
501
sbb_newspapers_org_image/utils/drop_capitals.py
Normal file
501
sbb_newspapers_org_image/utils/drop_capitals.py
Normal file
|
@ -0,0 +1,501 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
from .contour import (
|
||||
find_new_features_of_contoures,
|
||||
return_contours_of_image,
|
||||
return_parent_contours,
|
||||
)
|
||||
|
||||
def adhere_drop_capital_region_into_cprresponding_textline(
|
||||
text_regions_p,
|
||||
polygons_of_drop_capitals,
|
||||
contours_only_text_parent,
|
||||
contours_only_text_parent_h,
|
||||
all_box_coord,
|
||||
all_box_coord_h,
|
||||
all_found_texline_polygons,
|
||||
all_found_texline_polygons_h,
|
||||
kernel=None,
|
||||
curved_line=False,
|
||||
):
|
||||
# print(np.shape(all_found_texline_polygons),np.shape(all_found_texline_polygons[3]),'all_found_texline_polygonsshape')
|
||||
# print(all_found_texline_polygons[3])
|
||||
cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent)
|
||||
cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent_h)
|
||||
cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contoures(polygons_of_drop_capitals)
|
||||
|
||||
img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
for j_cont in range(len(contours_only_text_parent)):
|
||||
img_con_all[all_box_coord[j_cont][0] : all_box_coord[j_cont][1], all_box_coord[j_cont][2] : all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3
|
||||
# img_con_all=cv2.fillPoly(img_con_all,pts=[contours_only_text_parent[j_cont]],color=((j_cont+1)*3,(j_cont+1)*3,(j_cont+1)*3))
|
||||
|
||||
# plt.imshow(img_con_all[:,:,0])
|
||||
# plt.show()
|
||||
# img_con_all=cv2.dilate(img_con_all, kernel, iterations=3)
|
||||
|
||||
# plt.imshow(img_con_all[:,:,0])
|
||||
# plt.show()
|
||||
# print(np.unique(img_con_all[:,:,0]))
|
||||
for i_drop in range(len(polygons_of_drop_capitals)):
|
||||
# print(i_drop,'i_drop')
|
||||
img_con_all_copy = np.copy(img_con_all)
|
||||
img_con = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_con = cv2.fillPoly(img_con, pts=[polygons_of_drop_capitals[i_drop]], color=(1, 1, 1))
|
||||
|
||||
# plt.imshow(img_con[:,:,0])
|
||||
# plt.show()
|
||||
##img_con=cv2.dilate(img_con, kernel, iterations=30)
|
||||
|
||||
# plt.imshow(img_con[:,:,0])
|
||||
# plt.show()
|
||||
|
||||
# print(np.unique(img_con[:,:,0]))
|
||||
|
||||
img_con_all_copy[:, :, 0] = img_con_all_copy[:, :, 0] + img_con[:, :, 0]
|
||||
|
||||
img_con_all_copy[:, :, 0][img_con_all_copy[:, :, 0] == 1] = 0
|
||||
|
||||
kherej_ghesmat = np.unique(img_con_all_copy[:, :, 0]) / 3
|
||||
res_summed_pixels = np.unique(img_con_all_copy[:, :, 0]) % 3
|
||||
region_with_intersected_drop = kherej_ghesmat[res_summed_pixels == 1]
|
||||
# region_with_intersected_drop=region_with_intersected_drop/3
|
||||
region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8)
|
||||
|
||||
# print(len(region_with_intersected_drop),'region_with_intersected_drop1')
|
||||
if len(region_with_intersected_drop) == 0:
|
||||
img_con_all_copy = np.copy(img_con_all)
|
||||
img_con = cv2.dilate(img_con, kernel, iterations=4)
|
||||
|
||||
img_con_all_copy[:, :, 0] = img_con_all_copy[:, :, 0] + img_con[:, :, 0]
|
||||
|
||||
img_con_all_copy[:, :, 0][img_con_all_copy[:, :, 0] == 1] = 0
|
||||
|
||||
kherej_ghesmat = np.unique(img_con_all_copy[:, :, 0]) / 3
|
||||
res_summed_pixels = np.unique(img_con_all_copy[:, :, 0]) % 3
|
||||
region_with_intersected_drop = kherej_ghesmat[res_summed_pixels == 1]
|
||||
# region_with_intersected_drop=region_with_intersected_drop/3
|
||||
region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8)
|
||||
# print(np.unique(img_con_all_copy[:,:,0]))
|
||||
if curved_line:
|
||||
|
||||
if len(region_with_intersected_drop) > 1:
|
||||
sum_pixels_of_intersection = []
|
||||
for i in range(len(region_with_intersected_drop)):
|
||||
# print((region_with_intersected_drop[i]*3+1))
|
||||
sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum())
|
||||
# print(sum_pixels_of_intersection)
|
||||
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
|
||||
|
||||
# print(region_final,'region_final')
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
# print(cx_d[i_drop])
|
||||
# print(cy_d[i_drop])
|
||||
y_lines = np.array(cy_t) # all_box_coord[int(region_final)][0]+np.array(cy_t)
|
||||
|
||||
# print(y_lines)
|
||||
|
||||
y_lines[y_lines < y_min_d[i_drop]] = 0
|
||||
# print(y_lines)
|
||||
|
||||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
|
||||
|
||||
img_textlines = img_textlines.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# print(len(contours_combined),'len textlines mixed')
|
||||
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
# print(np.shape(contours_biggest))
|
||||
# print(contours_biggest[:])
|
||||
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
|
||||
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
|
||||
except:
|
||||
# print('gordun1')
|
||||
pass
|
||||
elif len(region_with_intersected_drop) == 1:
|
||||
region_final = region_with_intersected_drop[0] - 1
|
||||
|
||||
# areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
|
||||
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
# print(cx_d[i_drop])
|
||||
# print(cy_d[i_drop])
|
||||
y_lines = np.array(cy_t) # all_box_coord[int(region_final)][0]+np.array(cy_t)
|
||||
|
||||
y_lines[y_lines < y_min_d[i_drop]] = 0
|
||||
# print(y_lines)
|
||||
|
||||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
|
||||
|
||||
img_textlines = img_textlines.astype(np.uint8)
|
||||
|
||||
# plt.imshow(img_textlines)
|
||||
# plt.show()
|
||||
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# print(len(contours_combined),'len textlines mixed')
|
||||
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
# print(np.shape(contours_biggest))
|
||||
# print(contours_biggest[:])
|
||||
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
# print(np.shape(contours_biggest),'contours_biggest')
|
||||
# print(np.shape(all_found_texline_polygons[int(region_final)][arg_min]))
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
|
||||
# print(cx_t,'print')
|
||||
try:
|
||||
# print(all_found_texline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
# print(cx_d[i_drop])
|
||||
# print(cy_d[i_drop])
|
||||
y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
|
||||
|
||||
y_lines[y_lines < y_min_d[i_drop]] = 0
|
||||
# print(y_lines)
|
||||
|
||||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
|
||||
|
||||
img_textlines = img_textlines.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# print(len(contours_combined),'len textlines mixed')
|
||||
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
# print(np.shape(contours_biggest))
|
||||
# print(contours_biggest[:])
|
||||
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2]
|
||||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
|
||||
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
# all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
###print(all_box_coord[j_cont])
|
||||
###print(cx_t)
|
||||
###print(cy_t)
|
||||
###print(cx_d[i_drop])
|
||||
###print(cy_d[i_drop])
|
||||
##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
|
||||
|
||||
##y_lines[y_lines<y_min_d[i_drop]]=0
|
||||
###print(y_lines)
|
||||
|
||||
##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
###print(arg_min)
|
||||
|
||||
##cnt_nearest=np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
##cnt_nearest[:,0,0]=all_found_texline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
|
||||
##cnt_nearest[:,0,1]=all_found_texline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
|
||||
|
||||
##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
##img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
|
||||
##img_textlines=img_textlines.astype(np.uint8)
|
||||
|
||||
##plt.imshow(img_textlines)
|
||||
##plt.show()
|
||||
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
##contours_combined,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
##print(len(contours_combined),'len textlines mixed')
|
||||
##areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
##contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
###print(np.shape(contours_biggest))
|
||||
###print(contours_biggest[:])
|
||||
##contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
|
||||
##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
|
||||
|
||||
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
|
||||
##all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
else:
|
||||
if len(region_with_intersected_drop) > 1:
|
||||
sum_pixels_of_intersection = []
|
||||
for i in range(len(region_with_intersected_drop)):
|
||||
# print((region_with_intersected_drop[i]*3+1))
|
||||
sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum())
|
||||
# print(sum_pixels_of_intersection)
|
||||
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
|
||||
|
||||
# print(region_final,'region_final')
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
try:
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
# print(cx_d[i_drop])
|
||||
# print(cy_d[i_drop])
|
||||
y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
|
||||
|
||||
# print(y_lines)
|
||||
|
||||
y_lines[y_lines < y_min_d[i_drop]] = 0
|
||||
# print(y_lines)
|
||||
|
||||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
|
||||
|
||||
img_textlines = img_textlines.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# print(len(contours_combined),'len textlines mixed')
|
||||
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
# print(np.shape(contours_biggest))
|
||||
# print(contours_biggest[:])
|
||||
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2]
|
||||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
|
||||
|
||||
contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
|
||||
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
|
||||
except:
|
||||
# print('gordun1')
|
||||
pass
|
||||
elif len(region_with_intersected_drop) == 1:
|
||||
region_final = region_with_intersected_drop[0] - 1
|
||||
|
||||
# areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
|
||||
|
||||
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
|
||||
# print(cx_t,'print')
|
||||
try:
|
||||
# print(all_found_texline_polygons[j_cont][0])
|
||||
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
|
||||
# print(all_box_coord[j_cont])
|
||||
# print(cx_t)
|
||||
# print(cy_t)
|
||||
# print(cx_d[i_drop])
|
||||
# print(cy_d[i_drop])
|
||||
y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
|
||||
|
||||
y_lines[y_lines < y_min_d[i_drop]] = 0
|
||||
# print(y_lines)
|
||||
|
||||
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
|
||||
# print(arg_min)
|
||||
|
||||
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
|
||||
cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
|
||||
cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
|
||||
|
||||
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
|
||||
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
|
||||
|
||||
img_textlines = img_textlines.astype(np.uint8)
|
||||
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
# print(len(contours_combined),'len textlines mixed')
|
||||
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
# print(np.shape(contours_biggest))
|
||||
# print(contours_biggest[:])
|
||||
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2]
|
||||
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
|
||||
|
||||
contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
|
||||
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
|
||||
# all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
|
||||
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
#####for i_drop in range(len(polygons_of_drop_capitals)):
|
||||
#####for j_cont in range(len(contours_only_text_parent)):
|
||||
#####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
#####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
#####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
|
||||
|
||||
#####img_con=img_con.astype(np.uint8)
|
||||
######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
|
||||
######ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
######contours_new,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
#####contours_new,hir_new=return_contours_of_image(img_con)
|
||||
#####contours_new_parent=return_parent_contours( contours_new,hir_new)
|
||||
######plt.imshow(img_con)
|
||||
######plt.show()
|
||||
#####try:
|
||||
#####if len(contours_new_parent)==1:
|
||||
######print(all_found_texline_polygons[j_cont][0])
|
||||
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[j_cont])
|
||||
######print(all_box_coord[j_cont])
|
||||
######print(cx_t)
|
||||
######print(cy_t)
|
||||
######print(cx_d[i_drop])
|
||||
######print(cy_d[i_drop])
|
||||
#####y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
|
||||
|
||||
######print(y_lines)
|
||||
|
||||
#####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
|
||||
######print(arg_min)
|
||||
|
||||
#####cnt_nearest=np.copy(all_found_texline_polygons[j_cont][arg_min])
|
||||
#####cnt_nearest[:,0]=all_found_texline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
|
||||
#####cnt_nearest[:,1]=all_found_texline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
|
||||
|
||||
#####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
|
||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
|
||||
#####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
|
||||
|
||||
#####img_textlines=img_textlines.astype(np.uint8)
|
||||
#####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
|
||||
#####ret, thresh = cv2.threshold(imgray, 0, 255, 0)
|
||||
|
||||
#####contours_combined,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
#####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
|
||||
|
||||
#####contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
|
||||
|
||||
######print(np.shape(contours_biggest))
|
||||
######print(contours_biggest[:])
|
||||
#####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
|
||||
#####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
|
||||
|
||||
#####all_found_texline_polygons[j_cont][arg_min]=contours_biggest
|
||||
######print(contours_biggest)
|
||||
######plt.imshow(img_textlines[:,:,0])
|
||||
######plt.show()
|
||||
#####else:
|
||||
#####pass
|
||||
#####except:
|
||||
#####pass
|
||||
return all_found_texline_polygons
|
||||
|
||||
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
|
||||
|
||||
drop_only = (layout_no_patch[:, :, 0] == 4) * 1
|
||||
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
|
||||
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
|
||||
|
||||
areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))])
|
||||
areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1])
|
||||
|
||||
contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.001]
|
||||
|
||||
areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001]
|
||||
|
||||
contours_drop_parent_final = []
|
||||
|
||||
for jj in range(len(contours_drop_parent)):
|
||||
x, y, w, h = cv2.boundingRect(contours_drop_parent[jj])
|
||||
# boxes.append([int(x), int(y), int(w), int(h)])
|
||||
|
||||
iou_of_box_and_contoure = float(drop_only.shape[0] * drop_only.shape[1]) * areas_cnt_text[jj] / float(w * h) * 100
|
||||
height_to_weight_ratio = h / float(w)
|
||||
weigh_to_height_ratio = w / float(h)
|
||||
|
||||
if iou_of_box_and_contoure > 60 and weigh_to_height_ratio < 1.2 and height_to_weight_ratio < 2:
|
||||
map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1]))
|
||||
map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w]
|
||||
|
||||
if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15:
|
||||
contours_drop_parent_final.append(contours_drop_parent[jj])
|
||||
|
||||
layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0
|
||||
|
||||
layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4))
|
||||
|
||||
return layout_no_patch
|
||||
|
3
sbb_newspapers_org_image/utils/is_nan.py
Normal file
3
sbb_newspapers_org_image/utils/is_nan.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
|
||||
def isNaN(num):
|
||||
return num != num
|
252
sbb_newspapers_org_image/utils/marginals.py
Normal file
252
sbb_newspapers_org_image/utils/marginals.py
Normal file
|
@ -0,0 +1,252 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
from scipy.signal import find_peaks
|
||||
from scipy.ndimage import gaussian_filter1d
|
||||
|
||||
|
||||
from .contour import find_new_features_of_contoures, return_contours_of_interested_region
|
||||
from .resize import resize_image
|
||||
from .rotate import rotate_image
|
||||
|
||||
def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None):
|
||||
mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1]))
|
||||
mask_marginals=mask_marginals.astype(np.uint8)
|
||||
|
||||
|
||||
text_with_lines=text_with_lines.astype(np.uint8)
|
||||
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
|
||||
|
||||
text_with_lines_eroded=cv2.erode(text_with_lines,kernel,iterations=5)
|
||||
|
||||
if text_with_lines.shape[0]<=1500:
|
||||
pass
|
||||
elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800:
|
||||
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1])
|
||||
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5)
|
||||
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
|
||||
else:
|
||||
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1])
|
||||
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7)
|
||||
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
|
||||
|
||||
|
||||
text_with_lines_y=text_with_lines.sum(axis=0)
|
||||
text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0)
|
||||
|
||||
thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100
|
||||
|
||||
#print(thickness_along_y_percent,'thickness_along_y_percent')
|
||||
|
||||
if thickness_along_y_percent<30:
|
||||
min_textline_thickness=8
|
||||
elif thickness_along_y_percent>=30 and thickness_along_y_percent<50:
|
||||
min_textline_thickness=20
|
||||
else:
|
||||
min_textline_thickness=40
|
||||
|
||||
|
||||
|
||||
if thickness_along_y_percent>=14:
|
||||
|
||||
text_with_lines_y_rev=-1*text_with_lines_y[:]
|
||||
#print(text_with_lines_y)
|
||||
#print(text_with_lines_y_rev)
|
||||
|
||||
|
||||
|
||||
|
||||
#plt.plot(text_with_lines_y)
|
||||
#plt.show()
|
||||
|
||||
|
||||
text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev)
|
||||
|
||||
#plt.plot(text_with_lines_y_rev)
|
||||
#plt.show()
|
||||
sigma_gaus=1
|
||||
region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus)
|
||||
|
||||
region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus)
|
||||
|
||||
#plt.plot(region_sum_0_rev)
|
||||
#plt.show()
|
||||
region_sum_0_updown=region_sum_0[len(region_sum_0)::-1]
|
||||
|
||||
first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None))
|
||||
last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None))
|
||||
|
||||
|
||||
last_nonzero=len(region_sum_0)-last_nonzero
|
||||
|
||||
##img_sum_0_smooth_rev=-region_sum_0
|
||||
|
||||
|
||||
mid_point=(last_nonzero+first_nonzero)/2.
|
||||
|
||||
|
||||
one_third_right=(last_nonzero-mid_point)/3.0
|
||||
one_third_left=(mid_point-first_nonzero)/3.0
|
||||
|
||||
#img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev)
|
||||
|
||||
|
||||
|
||||
|
||||
peaks, _ = find_peaks(text_with_lines_y_rev, height=0)
|
||||
|
||||
|
||||
peaks=np.array(peaks)
|
||||
|
||||
|
||||
#print(region_sum_0[peaks])
|
||||
##plt.plot(region_sum_0)
|
||||
##plt.plot(peaks,region_sum_0[peaks],'*')
|
||||
##plt.show()
|
||||
#print(first_nonzero,last_nonzero,peaks)
|
||||
peaks=peaks[(peaks>first_nonzero) & ((peaks<last_nonzero))]
|
||||
|
||||
#print(first_nonzero,last_nonzero,peaks)
|
||||
|
||||
|
||||
#print(region_sum_0[peaks]<10)
|
||||
####peaks=peaks[region_sum_0[peaks]<25 ]
|
||||
|
||||
#print(region_sum_0[peaks])
|
||||
peaks=peaks[region_sum_0[peaks]<min_textline_thickness ]
|
||||
#print(peaks)
|
||||
#print(first_nonzero,last_nonzero,one_third_right,one_third_left)
|
||||
|
||||
if num_col==1:
|
||||
peaks_right=peaks[peaks>mid_point]
|
||||
peaks_left=peaks[peaks<mid_point]
|
||||
if num_col==2:
|
||||
peaks_right=peaks[peaks>(mid_point+one_third_right)]
|
||||
peaks_left=peaks[peaks<(mid_point-one_third_left)]
|
||||
|
||||
|
||||
try:
|
||||
point_right=np.min(peaks_right)
|
||||
except:
|
||||
point_right=last_nonzero
|
||||
|
||||
|
||||
try:
|
||||
point_left=np.max(peaks_left)
|
||||
except:
|
||||
point_left=first_nonzero
|
||||
|
||||
|
||||
|
||||
|
||||
#print(point_left,point_right)
|
||||
#print(text_regions.shape)
|
||||
if point_right>=mask_marginals.shape[1]:
|
||||
point_right=mask_marginals.shape[1]-1
|
||||
|
||||
try:
|
||||
mask_marginals[:,point_left:point_right]=1
|
||||
except:
|
||||
mask_marginals[:,:]=1
|
||||
|
||||
#print(mask_marginals.shape,point_left,point_right,'nadosh')
|
||||
mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew)
|
||||
|
||||
#print(mask_marginals_rotated.shape,'nadosh')
|
||||
mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0)
|
||||
|
||||
mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1
|
||||
index_x=np.array(range(len(mask_marginals_rotated_sum)))+1
|
||||
|
||||
index_x_interest=index_x[mask_marginals_rotated_sum==1]
|
||||
|
||||
min_point_of_left_marginal=np.min(index_x_interest)-16
|
||||
max_point_of_right_marginal=np.max(index_x_interest)+16
|
||||
|
||||
if min_point_of_left_marginal<0:
|
||||
min_point_of_left_marginal=0
|
||||
if max_point_of_right_marginal>=text_regions.shape[1]:
|
||||
max_point_of_right_marginal=text_regions.shape[1]-1
|
||||
|
||||
|
||||
#print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew')
|
||||
#print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated')
|
||||
#plt.imshow(mask_marginals)
|
||||
#plt.show()
|
||||
|
||||
#plt.imshow(mask_marginals_rotated)
|
||||
#plt.show()
|
||||
|
||||
text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4
|
||||
|
||||
#plt.imshow(text_regions)
|
||||
#plt.show()
|
||||
|
||||
pixel_img=4
|
||||
min_area_text=0.00001
|
||||
polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text)
|
||||
|
||||
cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contoures(polygons_of_marginals)
|
||||
|
||||
text_regions[(text_regions[:,:]==4)]=1
|
||||
|
||||
marginlas_should_be_main_text=[]
|
||||
|
||||
x_min_marginals_left=[]
|
||||
x_min_marginals_right=[]
|
||||
|
||||
for i in range(len(cx_text_only)):
|
||||
|
||||
x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i])
|
||||
y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i])
|
||||
#print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar')
|
||||
if x_width_mar>16 and y_height_mar/x_width_mar<18:
|
||||
marginlas_should_be_main_text.append(polygons_of_marginals[i])
|
||||
if x_min_text_only[i]<(mid_point-one_third_left):
|
||||
x_min_marginals_left_new=x_min_text_only[i]
|
||||
if len(x_min_marginals_left)==0:
|
||||
x_min_marginals_left.append(x_min_marginals_left_new)
|
||||
else:
|
||||
x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new)
|
||||
else:
|
||||
x_min_marginals_right_new=x_min_text_only[i]
|
||||
if len(x_min_marginals_right)==0:
|
||||
x_min_marginals_right.append(x_min_marginals_right_new)
|
||||
else:
|
||||
x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new)
|
||||
|
||||
if len(x_min_marginals_left)==0:
|
||||
x_min_marginals_left=[0]
|
||||
if len(x_min_marginals_right)==0:
|
||||
x_min_marginals_right=[text_regions.shape[1]-1]
|
||||
|
||||
|
||||
|
||||
|
||||
#print(x_min_marginals_left[0],x_min_marginals_right[0],'margo')
|
||||
|
||||
#print(marginlas_should_be_main_text,'marginlas_should_be_main_text')
|
||||
text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4))
|
||||
|
||||
#print(np.unique(text_regions))
|
||||
|
||||
#text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0
|
||||
#text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0
|
||||
|
||||
text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0
|
||||
text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0
|
||||
|
||||
###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
|
||||
|
||||
###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
|
||||
#plt.plot(region_sum_0)
|
||||
#plt.plot(peaks,region_sum_0[peaks],'*')
|
||||
#plt.show()
|
||||
|
||||
|
||||
#plt.imshow(text_regions)
|
||||
#plt.show()
|
||||
|
||||
#sys.exit()
|
||||
else:
|
||||
pass
|
||||
return text_regions
|
4
sbb_newspapers_org_image/utils/resize.py
Normal file
4
sbb_newspapers_org_image/utils/resize.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
import cv2
|
||||
|
||||
def resize_image(img_in, input_height, input_width):
|
||||
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)
|
85
sbb_newspapers_org_image/utils/rotate.py
Normal file
85
sbb_newspapers_org_image/utils/rotate.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
import math
|
||||
|
||||
import imutils
|
||||
import cv2
|
||||
|
||||
def rotatedRectWithMaxArea(w, h, angle):
|
||||
if w <= 0 or h <= 0:
|
||||
return 0, 0
|
||||
|
||||
width_is_longer = w >= h
|
||||
side_long, side_short = (w, h) if width_is_longer else (h, w)
|
||||
|
||||
# since the solutions for angle, -angle and 180-angle are all the same,
|
||||
# if suffices to look at the first quadrant and the absolute values of sin,cos:
|
||||
sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
|
||||
if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10:
|
||||
# half constrained case: two crop corners touch the longer side,
|
||||
# the other two corners are on the mid-line parallel to the longer line
|
||||
x = 0.5 * side_short
|
||||
wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
|
||||
else:
|
||||
# fully constrained case: crop touches all 4 sides
|
||||
cos_2a = cos_a * cos_a - sin_a * sin_a
|
||||
wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a
|
||||
|
||||
return wr, hr
|
||||
|
||||
def rotate_max_area_new(image, rotated, angle):
|
||||
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
||||
h, w, _ = rotated.shape
|
||||
y1 = h // 2 - int(hr / 2)
|
||||
y2 = y1 + int(hr)
|
||||
x1 = w // 2 - int(wr / 2)
|
||||
x2 = x1 + int(wr)
|
||||
return rotated[y1:y2, x1:x2]
|
||||
|
||||
def rotation_image_new(img, thetha):
|
||||
rotated = imutils.rotate(img, thetha)
|
||||
return rotate_max_area_new(img, rotated, thetha)
|
||||
|
||||
def rotate_image(img_patch, slope):
|
||||
(h, w) = img_patch.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, slope, 1.0)
|
||||
return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
||||
|
||||
def rotyate_image_different( img, slope):
|
||||
# img = cv2.imread('images/input.jpg')
|
||||
num_rows, num_cols = img.shape[:2]
|
||||
|
||||
rotation_matrix = cv2.getRotationMatrix2D((num_cols / 2, num_rows / 2), slope, 1)
|
||||
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
|
||||
return img_rotation
|
||||
|
||||
def rotate_max_area(image, rotated, rotated_textline, rotated_layout, angle):
|
||||
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
||||
h, w, _ = rotated.shape
|
||||
y1 = h // 2 - int(hr / 2)
|
||||
y2 = y1 + int(hr)
|
||||
x1 = w // 2 - int(wr / 2)
|
||||
x2 = x1 + int(wr)
|
||||
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2]
|
||||
|
||||
def rotation_not_90_func(img, textline, text_regions_p_1, thetha):
|
||||
rotated = imutils.rotate(img, thetha)
|
||||
rotated_textline = imutils.rotate(textline, thetha)
|
||||
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
|
||||
return rotate_max_area(img, rotated, rotated_textline, rotated_layout, thetha)
|
||||
|
||||
def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
|
||||
rotated = imutils.rotate(img, thetha)
|
||||
rotated_textline = imutils.rotate(textline, thetha)
|
||||
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
|
||||
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
|
||||
return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
|
||||
|
||||
def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
|
||||
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
|
||||
h, w, _ = rotated.shape
|
||||
y1 = h // 2 - int(hr / 2)
|
||||
y2 = y1 + int(hr)
|
||||
x1 = w // 2 - int(wr / 2)
|
||||
x2 = x1 + int(wr)
|
||||
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2]
|
||||
|
1752
sbb_newspapers_org_image/utils/separate_lines.py
Normal file
1752
sbb_newspapers_org_image/utils/separate_lines.py
Normal file
File diff suppressed because it is too large
Load diff
2
setup.py
2
setup.py
|
@ -16,7 +16,7 @@ setup(
|
|||
install_requires=install_requires,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'eynollah=sbb_newspapers_org_image.eynollah:main',
|
||||
'eynollah=sbb_newspapers_org_image.cli:main',
|
||||
# 'ocrd-eynollah=eynollah.ocrd_cli:cli',
|
||||
]
|
||||
},
|
||||
|
|
BIN
tests/resources/kant_aufklaerung_1784_0020.tif
Normal file
BIN
tests/resources/kant_aufklaerung_1784_0020.tif
Normal file
Binary file not shown.
7
tests/test_smoke.py
Normal file
7
tests/test_smoke.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
def test_utils_import():
|
||||
import sbb_newspapers_org_image.utils
|
||||
import sbb_newspapers_org_image.utils.contour
|
||||
import sbb_newspapers_org_image.utils.drop_capitals
|
||||
import sbb_newspapers_org_image.utils.drop_capitals
|
||||
import sbb_newspapers_org_image.utils.is_nan
|
||||
import sbb_newspapers_org_image.utils.rotate
|
Loading…
Add table
Add a link
Reference in a new issue