rename package to qurator.eynollah

This commit is contained in:
Konstantin Baierer 2021-02-24 18:49:05 +01:00
parent a96d23712d
commit 58c4403e13
22 changed files with 15 additions and 13 deletions

0
qurator/.gitkeep Normal file
View file

1
qurator/__init__.py Normal file
View file

@ -0,0 +1 @@
__import__("pkg_resources").declare_namespace(__name__)

View file

@ -0,0 +1 @@

139
qurator/eynollah/cli.py Normal file
View file

@ -0,0 +1,139 @@
import sys
import click
from ocrd_utils import initLogging, setOverrideLogLevel
from qurator.eynollah.eynollah import Eynollah
@click.command()
@click.option(
"--image",
"-i",
help="image filename",
type=click.Path(exists=True, dir_okay=False),
required=True,
)
@click.option(
"--out",
"-o",
help="directory to write output xml data",
type=click.Path(exists=True, file_okay=False),
required=True,
)
@click.option(
"--model",
"-m",
help="directory of models",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_images",
"-si",
help="if a directory is given, images in documents will be cropped and saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_layout",
"-sl",
help="if a directory is given, plot of layout will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_deskewed",
"-sd",
help="if a directory is given, deskewed image will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--save_all",
"-sa",
help="if a directory is given, all plots needed for documentation will be saved there",
type=click.Path(exists=True, file_okay=False),
)
@click.option(
"--enable-plotting/--disable-plotting",
"-ep/-noep",
is_flag=True,
help="If set, will plot intermediary files and images",
)
@click.option(
"--allow-enhancement/--no-allow-enhancement",
"-ae/-noae",
is_flag=True,
help="if this parameter set to true, this tool would check that input image need resizing and enhancement or not. If so output of resized and enhanced image and corresponding layout data will be written in out directory",
)
@click.option(
"--curved-line/--no-curvedline",
"-cl/-nocl",
is_flag=True,
help="if this parameter set to true, this tool will try to return contoure of textlines instead of rectabgle bounding box of textline. This should be taken into account that with this option the tool need more time to do process.",
)
@click.option(
"--full-layout/--no-full-layout",
"-fl/-nofl",
is_flag=True,
help="if this parameter set to true, this tool will try to return all elements of layout.",
)
@click.option(
"--allow_scaling/--no-allow-scaling",
"-as/-noas",
is_flag=True,
help="if this parameter set to true, this tool would check the scale and if needed it will scale it to perform better layout detection",
)
@click.option(
"--headers-off/--headers-on",
"-ho/-noho",
is_flag=True,
help="if this parameter set to true, this tool would ignore headers role in reading order",
)
@click.option(
"--log-level",
"-l",
type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']),
help="Override log level globally to this",
)
def main(
image,
out,
model,
save_images,
save_layout,
save_deskewed,
save_all,
enable_plotting,
allow_enhancement,
curved_line,
full_layout,
allow_scaling,
headers_off,
log_level
):
if log_level:
setOverrideLogLevel(log_level)
initLogging()
if not enable_plotting and (save_layout or save_deskewed or save_all or save_images):
print("Error: You used one of -sl, -sd, -sa or -si but did not enable plotting with -ep")
sys.exit(1)
elif enable_plotting and not (save_layout or save_deskewed or save_all or save_images):
print("Error: You used -ep to enable plotting but set none of -sl, -sd, -sa or -si")
sys.exit(1)
eynollah = Eynollah(
image,
None,
out,
model,
save_images,
save_layout,
save_deskewed,
save_all,
enable_plotting,
allow_enhancement,
curved_line,
full_layout,
allow_scaling,
headers_off,
)
pcgts = eynollah.run()
eynollah.writer.write_pagexml(pcgts)
if __name__ == "__main__":
main()

1818
qurator/eynollah/eynollah.py Normal file

File diff suppressed because it is too large Load diff

169
qurator/eynollah/plot.py Normal file
View file

@ -0,0 +1,169 @@
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import os.path
import cv2
from scipy.ndimage import gaussian_filter1d
from .utils import crop_image_inside_box
from .utils.rotate import rotyate_image_different
from .utils.resize import resize_image
class EynollahPlotter():
"""
Class collecting all the plotting and image writing methods
"""
def __init__(
self,
*,
dir_of_all,
dir_of_deskewed,
dir_of_layout,
dir_of_cropped_images,
image_filename,
image_filename_stem,
image_org=None,
scale_x=1,
scale_y=1,
):
self.dir_of_all = dir_of_all
self.dir_of_layout = dir_of_layout
self.dir_of_cropped_images = dir_of_cropped_images
self.dir_of_deskewed = dir_of_deskewed
self.image_filename = image_filename
self.image_filename_stem = image_filename_stem
# XXX TODO hacky these cannot be set at init time
self.image_org = image_org
self.scale_x = scale_x
self.scale_y = scale_y
def save_plot_of_layout_main(self, text_regions_p, image_page):
if self.dir_of_layout is not None:
values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia']
values_indexes = [0, 1, 2, 3, 4]
plt.figure(figsize=(40, 40))
plt.rcParams["font.size"] = "40"
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png"))
def save_plot_of_layout_main_all(self, text_regions_p, image_page):
if self.dir_of_all is not None:
values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
pixels=['Background' , 'Main text' , 'Image' , 'Separator','Marginalia']
values_indexes = [0, 1, 2, 3, 4]
plt.figure(figsize=(80, 40))
plt.rcParams["font.size"] = "40"
plt.subplot(1, 2, 1)
plt.imshow(image_page)
plt.subplot(1, 2, 2)
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png"))
def save_plot_of_layout(self, text_regions_p, image_page):
if self.dir_of_layout is not None:
values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
values_indexes = [0, 1, 2, 8, 4, 5, 6]
plt.figure(figsize=(40, 40))
plt.rcParams["font.size"] = "40"
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png"))
def save_plot_of_layout_all(self, text_regions_p, image_page):
if self.dir_of_all is not None:
values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
pixels = ["Background", "Main text", "Header", "Marginalia", "Drop capital", "Image", "Separator"]
values_indexes = [0, 1, 2, 8, 4, 5, 6]
plt.figure(figsize=(80, 40))
plt.rcParams["font.size"] = "40"
plt.subplot(1, 2, 1)
plt.imshow(image_page)
plt.subplot(1, 2, 2)
im = plt.imshow(text_regions_p[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png"))
def save_plot_of_textlines(self, textline_mask_tot_ea, image_page):
if self.dir_of_all is not None:
values = np.unique(textline_mask_tot_ea[:, :])
pixels = ["Background", "Textlines"]
values_indexes = [0, 1]
plt.figure(figsize=(80, 40))
plt.rcParams["font.size"] = "40"
plt.subplot(1, 2, 1)
plt.imshow(image_page)
plt.subplot(1, 2, 2)
im = plt.imshow(textline_mask_tot_ea[:, :])
colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png"))
def save_deskewed_image(self, slope_deskew):
if self.dir_of_all is not None:
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_org.png"), self.image_org)
if self.dir_of_deskewed is not None:
img_rotated = rotyate_image_different(self.image_org, slope_deskew)
cv2.imwrite(os.path.join(self.dir_of_deskewed, self.image_filename_stem + "_deskewed.png"), img_rotated)
def save_page_image(self, image_page):
if self.dir_of_all is not None:
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page)
def save_plot_of_textline_density(self, img_patch_org):
if self.dir_of_all is not None:
plt.figure(figsize=(80,40))
plt.rcParams['font.size']='50'
plt.subplot(1,2,1)
plt.imshow(img_patch_org)
plt.subplot(1,2,2)
plt.plot(gaussian_filter1d(img_patch_org.sum(axis=1), 3),np.array(range(len(gaussian_filter1d(img_patch_org.sum(axis=1), 3)))),linewidth=8)
plt.xlabel('Density of textline prediction in direction of X axis',fontsize=60)
plt.ylabel('Height',fontsize=60)
plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
plt.gca().invert_yaxis()
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png'))
def save_plot_of_rotation_angle(self, angels, var_res):
if self.dir_of_all is not None:
plt.figure(figsize=(60,30))
plt.rcParams['font.size']='50'
plt.plot(angels,np.array(var_res),'-o',markersize=25,linewidth=4)
plt.xlabel('angle',fontsize=50)
plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
plt.legend(loc='best')
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png'))
def write_images_into_directory(self, img_contoures, image_page):
if self.dir_of_cropped_images is not None:
index = 0
for cont_ind in img_contoures:
x, y, w, h = cv2.boundingRect(cont_ind)
box = [x, y, w, h]
croped_page, page_coord = crop_image_inside_box(box, image_page)
croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x))
path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg")
cv2.imwrite(path, croped_page)
index += 1

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,238 @@
import cv2
import numpy as np
from shapely import geometry
from .rotate import rotate_image, rotation_image_new
def contours_in_same_horizon(cy_main_hor):
X1 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X2 = np.zeros((len(cy_main_hor), len(cy_main_hor)))
X1[0::1, :] = cy_main_hor[:]
X2 = X1.T
X_dif = np.abs(X2 - X1)
args_help = np.array(range(len(cy_main_hor)))
all_args = []
for i in range(len(cy_main_hor)):
list_h = list(args_help[X_dif[i, :] <= 20])
list_h.append(i)
if len(list_h) > 1:
all_args.append(list(set(list_h)))
return np.unique(all_args)
def find_contours_mean_y_diff(contours_main):
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
return np.mean(np.diff(np.sort(np.array(cy_main))))
def get_text_region_boxes_by_given_contours(contours):
kernel = np.ones((5, 5), np.uint8)
boxes = []
contours_new = []
for jj in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[jj])
boxes.append([x, y, w, h])
contours_new.append(contours[jj])
del contours
return boxes, contours_new
def filter_contours_area_of_image(image, contours, hirarchy, max_area, min_area):
found_polygons_early = list()
jv = 0
for c in contours:
if len(c) < 3: # A polygon cannot have less than 3 points
continue
polygon = geometry.Polygon([point[0] for point in c])
area = polygon.area
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and hirarchy[0][jv][3] == -1: # and hirarchy[0][jv][3]==-1 :
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.uint))
jv += 1
return found_polygons_early
def filter_contours_area_of_image_tables(image, contours, hirarchy, max_area, min_area):
found_polygons_early = list()
jv = 0
for c in contours:
if len(c) < 3: # A polygon cannot have less than 3 points
continue
polygon = geometry.Polygon([point[0] for point in c])
# area = cv2.contourArea(c)
area = polygon.area
##print(np.prod(thresh.shape[:2]))
# Check that polygon has area greater than minimal area
# print(hirarchy[0][jv][3],hirarchy )
if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]): # and hirarchy[0][jv][3]==-1 :
# print(c[0][0][1])
found_polygons_early.append(np.array([[point] for point in polygon.exterior.coords], dtype=np.int32))
jv += 1
return found_polygons_early
def find_new_features_of_contoures(contours_main):
areas_main = np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main = [cv2.moments(contours_main[j]) for j in range(len(contours_main))]
cx_main = [(M_main[j]["m10"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
cy_main = [(M_main[j]["m01"] / (M_main[j]["m00"] + 1e-32)) for j in range(len(M_main))]
try:
x_min_main = np.array([np.min(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 0] for j in range(len(contours_main))])
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0, 1] for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0, 0]) for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
y_max_main = np.array([np.max(contours_main[j][:, 0, 1]) for j in range(len(contours_main))])
except:
x_min_main = np.array([np.min(contours_main[j][:, 0]) for j in range(len(contours_main))])
argmin_x_main = np.array([np.argmin(contours_main[j][:, 0]) for j in range(len(contours_main))])
x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 0] for j in range(len(contours_main))])
y_corr_x_min_from_argmin = np.array([contours_main[j][argmin_x_main[j], 1] for j in range(len(contours_main))])
x_max_main = np.array([np.max(contours_main[j][:, 0]) for j in range(len(contours_main))])
y_min_main = np.array([np.min(contours_main[j][:, 1]) for j in range(len(contours_main))])
y_max_main = np.array([np.max(contours_main[j][:, 1]) for j in range(len(contours_main))])
# dis_x=np.abs(x_max_main-x_min_main)
return cx_main, cy_main, x_min_main, x_max_main, y_min_main, y_max_main, y_corr_x_min_from_argmin
def return_parent_contours(contours, hierarchy):
contours_parent = [contours[i] for i in range(len(contours)) if hierarchy[0][i][3] == -1]
return contours_parent
def return_contours_of_interested_region(region_pre_p, pixel, min_area=0.0002):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=min_area)
return contours_imgs
def get_textregion_contours_in_org_image(cnts, img, slope_first):
cnts_org = []
# print(cnts,'cnts')
for i in range(len(cnts)):
img_copy = np.zeros(img.shape)
img_copy = cv2.fillPoly(img_copy, pts=[cnts[i]], color=(1, 1, 1))
# plt.imshow(img_copy)
# plt.show()
# print(img.shape,'img')
img_copy = rotation_image_new(img_copy, -slope_first)
##print(img_copy.shape,'img_copy')
# plt.imshow(img_copy)
# plt.show()
img_copy = img_copy.astype(np.uint8)
imgray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
cont_int, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cont_int[0][:, 0, 0] = cont_int[0][:, 0, 0] + np.abs(img_copy.shape[1] - img.shape[1])
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
# print(np.shape(cont_int[0]))
cnts_org.append(cont_int[0])
# print(cnts_org,'cnts_org')
# sys.exit()
# self.y_shift = np.abs(img_copy.shape[0] - img.shape[0])
# self.x_shift = np.abs(img_copy.shape[1] - img.shape[1])
return cnts_org
def return_contours_of_interested_textline(region_pre_p, pixel):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=0.000000003)
return contours_imgs
def return_contours_of_image(image):
if len(image.shape) == 2:
image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
image = image.astype(np.uint8)
else:
image = image.astype(np.uint8)
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
return contours, hierachy
def return_contours_of_interested_region_by_min_size(region_pre_p, pixel, min_size=0.00003):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=1, min_area=min_size)
return contours_imgs
def return_contours_of_interested_region_by_size(region_pre_p, pixel, min_area, max_area):
# pixels of images are identified by 5
if len(region_pre_p.shape) == 3:
cnts_images = (region_pre_p[:, :, 0] == pixel) * 1
else:
cnts_images = (region_pre_p[:, :] == pixel) * 1
cnts_images = cnts_images.astype(np.uint8)
cnts_images = np.repeat(cnts_images[:, :, np.newaxis], 3, axis=2)
imgray = cv2.cvtColor(cnts_images, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_imgs, hiearchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_imgs = return_parent_contours(contours_imgs, hiearchy)
contours_imgs = filter_contours_area_of_image_tables(thresh, contours_imgs, hiearchy, max_area=max_area, min_area=min_area)
img_ret = np.zeros((region_pre_p.shape[0], region_pre_p.shape[1], 3))
img_ret = cv2.fillPoly(img_ret, pts=contours_imgs, color=(1, 1, 1))
return img_ret[:, :, 0]

View file

@ -0,0 +1,501 @@
import numpy as np
import cv2
from .contour import (
find_new_features_of_contoures,
return_contours_of_image,
return_parent_contours,
)
def adhere_drop_capital_region_into_cprresponding_textline(
text_regions_p,
polygons_of_drop_capitals,
contours_only_text_parent,
contours_only_text_parent_h,
all_box_coord,
all_box_coord_h,
all_found_texline_polygons,
all_found_texline_polygons_h,
kernel=None,
curved_line=False,
):
# print(np.shape(all_found_texline_polygons),np.shape(all_found_texline_polygons[3]),'all_found_texline_polygonsshape')
# print(all_found_texline_polygons[3])
cx_m, cy_m, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent)
cx_h, cy_h, _, _, _, _, _ = find_new_features_of_contoures(contours_only_text_parent_h)
cx_d, cy_d, _, _, y_min_d, y_max_d, _ = find_new_features_of_contoures(polygons_of_drop_capitals)
img_con_all = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
for j_cont in range(len(contours_only_text_parent)):
img_con_all[all_box_coord[j_cont][0] : all_box_coord[j_cont][1], all_box_coord[j_cont][2] : all_box_coord[j_cont][3], 0] = (j_cont + 1) * 3
# img_con_all=cv2.fillPoly(img_con_all,pts=[contours_only_text_parent[j_cont]],color=((j_cont+1)*3,(j_cont+1)*3,(j_cont+1)*3))
# plt.imshow(img_con_all[:,:,0])
# plt.show()
# img_con_all=cv2.dilate(img_con_all, kernel, iterations=3)
# plt.imshow(img_con_all[:,:,0])
# plt.show()
# print(np.unique(img_con_all[:,:,0]))
for i_drop in range(len(polygons_of_drop_capitals)):
# print(i_drop,'i_drop')
img_con_all_copy = np.copy(img_con_all)
img_con = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
img_con = cv2.fillPoly(img_con, pts=[polygons_of_drop_capitals[i_drop]], color=(1, 1, 1))
# plt.imshow(img_con[:,:,0])
# plt.show()
##img_con=cv2.dilate(img_con, kernel, iterations=30)
# plt.imshow(img_con[:,:,0])
# plt.show()
# print(np.unique(img_con[:,:,0]))
img_con_all_copy[:, :, 0] = img_con_all_copy[:, :, 0] + img_con[:, :, 0]
img_con_all_copy[:, :, 0][img_con_all_copy[:, :, 0] == 1] = 0
kherej_ghesmat = np.unique(img_con_all_copy[:, :, 0]) / 3
res_summed_pixels = np.unique(img_con_all_copy[:, :, 0]) % 3
region_with_intersected_drop = kherej_ghesmat[res_summed_pixels == 1]
# region_with_intersected_drop=region_with_intersected_drop/3
region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8)
# print(len(region_with_intersected_drop),'region_with_intersected_drop1')
if len(region_with_intersected_drop) == 0:
img_con_all_copy = np.copy(img_con_all)
img_con = cv2.dilate(img_con, kernel, iterations=4)
img_con_all_copy[:, :, 0] = img_con_all_copy[:, :, 0] + img_con[:, :, 0]
img_con_all_copy[:, :, 0][img_con_all_copy[:, :, 0] == 1] = 0
kherej_ghesmat = np.unique(img_con_all_copy[:, :, 0]) / 3
res_summed_pixels = np.unique(img_con_all_copy[:, :, 0]) % 3
region_with_intersected_drop = kherej_ghesmat[res_summed_pixels == 1]
# region_with_intersected_drop=region_with_intersected_drop/3
region_with_intersected_drop = region_with_intersected_drop.astype(np.uint8)
# print(np.unique(img_con_all_copy[:,:,0]))
if curved_line:
if len(region_with_intersected_drop) > 1:
sum_pixels_of_intersection = []
for i in range(len(region_with_intersected_drop)):
# print((region_with_intersected_drop[i]*3+1))
sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum())
# print(sum_pixels_of_intersection)
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
# print(region_final,'region_final')
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
try:
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
# print(all_box_coord[j_cont])
# print(cx_t)
# print(cy_t)
# print(cx_d[i_drop])
# print(cy_d[i_drop])
y_lines = np.array(cy_t) # all_box_coord[int(region_final)][0]+np.array(cy_t)
# print(y_lines)
y_lines[y_lines < y_min_d[i_drop]] = 0
# print(y_lines)
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
# print(arg_min)
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
img_textlines = img_textlines.astype(np.uint8)
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
# print(np.shape(contours_biggest))
# print(contours_biggest[:])
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
# contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
except:
# print('gordun1')
pass
elif len(region_with_intersected_drop) == 1:
region_final = region_with_intersected_drop[0] - 1
# areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
# print(all_box_coord[j_cont])
# print(cx_t)
# print(cy_t)
# print(cx_d[i_drop])
# print(cy_d[i_drop])
y_lines = np.array(cy_t) # all_box_coord[int(region_final)][0]+np.array(cy_t)
y_lines[y_lines < y_min_d[i_drop]] = 0
# print(y_lines)
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
# print(arg_min)
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
img_textlines = img_textlines.astype(np.uint8)
# plt.imshow(img_textlines)
# plt.show()
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
# print(np.shape(contours_biggest))
# print(contours_biggest[:])
# contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
# contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
# print(np.shape(contours_biggest),'contours_biggest')
# print(np.shape(all_found_texline_polygons[int(region_final)][arg_min]))
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
# print(cx_t,'print')
try:
# print(all_found_texline_polygons[j_cont][0])
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
# print(all_box_coord[j_cont])
# print(cx_t)
# print(cy_t)
# print(cx_d[i_drop])
# print(cy_d[i_drop])
y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
y_lines[y_lines < y_min_d[i_drop]] = 0
# print(y_lines)
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
# print(arg_min)
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
cnt_nearest[:, 0, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 0] # +all_box_coord[int(region_final)][2]
cnt_nearest[:, 0, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 0, 1] # +all_box_coord[int(region_final)][0]
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
img_textlines = img_textlines.astype(np.uint8)
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
# print(np.shape(contours_biggest))
# print(contours_biggest[:])
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] # -all_box_coord[int(region_final)][2]
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] # -all_box_coord[int(region_final)][0]
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
# all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
except:
pass
else:
pass
##cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
###print(all_box_coord[j_cont])
###print(cx_t)
###print(cy_t)
###print(cx_d[i_drop])
###print(cy_d[i_drop])
##y_lines=all_box_coord[int(region_final)][0]+np.array(cy_t)
##y_lines[y_lines<y_min_d[i_drop]]=0
###print(y_lines)
##arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
###print(arg_min)
##cnt_nearest=np.copy(all_found_texline_polygons[int(region_final)][arg_min])
##cnt_nearest[:,0,0]=all_found_texline_polygons[int(region_final)][arg_min][:,0,0]#+all_box_coord[int(region_final)][2]
##cnt_nearest[:,0,1]=all_found_texline_polygons[int(region_final)][arg_min][:,0,1]#+all_box_coord[int(region_final)][0]
##img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
##img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
##img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
##img_textlines=img_textlines.astype(np.uint8)
##plt.imshow(img_textlines)
##plt.show()
##imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
##ret, thresh = cv2.threshold(imgray, 0, 255, 0)
##contours_combined,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
##print(len(contours_combined),'len textlines mixed')
##areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
##contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
###print(np.shape(contours_biggest))
###print(contours_biggest[:])
##contours_biggest[:,0,0]=contours_biggest[:,0,0]#-all_box_coord[int(region_final)][2]
##contours_biggest[:,0,1]=contours_biggest[:,0,1]#-all_box_coord[int(region_final)][0]
##contours_biggest=contours_biggest.reshape(np.shape(contours_biggest)[0],np.shape(contours_biggest)[2])
##all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
else:
if len(region_with_intersected_drop) > 1:
sum_pixels_of_intersection = []
for i in range(len(region_with_intersected_drop)):
# print((region_with_intersected_drop[i]*3+1))
sum_pixels_of_intersection.append(((img_con_all_copy[:, :, 0] == (region_with_intersected_drop[i] * 3 + 1)) * 1).sum())
# print(sum_pixels_of_intersection)
region_final = region_with_intersected_drop[np.argmax(sum_pixels_of_intersection)] - 1
# print(region_final,'region_final')
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
try:
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
# print(all_box_coord[j_cont])
# print(cx_t)
# print(cy_t)
# print(cx_d[i_drop])
# print(cy_d[i_drop])
y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
# print(y_lines)
y_lines[y_lines < y_min_d[i_drop]] = 0
# print(y_lines)
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
# print(arg_min)
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
img_textlines = img_textlines.astype(np.uint8)
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
# print(np.shape(contours_biggest))
# print(contours_biggest[:])
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2]
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
except:
# print('gordun1')
pass
elif len(region_with_intersected_drop) == 1:
region_final = region_with_intersected_drop[0] - 1
# areas_main=np.array([cv2.contourArea(all_found_texline_polygons[int(region_final)][0][j] ) for j in range(len(all_found_texline_polygons[int(region_final)]))])
# cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
# print(cx_t,'print')
try:
# print(all_found_texline_polygons[j_cont][0])
cx_t, cy_t, _, _, _, _, _ = find_new_features_of_contoures(all_found_texline_polygons[int(region_final)])
# print(all_box_coord[j_cont])
# print(cx_t)
# print(cy_t)
# print(cx_d[i_drop])
# print(cy_d[i_drop])
y_lines = all_box_coord[int(region_final)][0] + np.array(cy_t)
y_lines[y_lines < y_min_d[i_drop]] = 0
# print(y_lines)
arg_min = np.argmin(np.abs(y_lines - y_min_d[i_drop]))
# print(arg_min)
cnt_nearest = np.copy(all_found_texline_polygons[int(region_final)][arg_min])
cnt_nearest[:, 0] = all_found_texline_polygons[int(region_final)][arg_min][:, 0] + all_box_coord[int(region_final)][2]
cnt_nearest[:, 1] = all_found_texline_polygons[int(region_final)][arg_min][:, 1] + all_box_coord[int(region_final)][0]
img_textlines = np.zeros((text_regions_p.shape[0], text_regions_p.shape[1], 3))
img_textlines = cv2.fillPoly(img_textlines, pts=[cnt_nearest], color=(255, 255, 255))
img_textlines = cv2.fillPoly(img_textlines, pts=[polygons_of_drop_capitals[i_drop]], color=(255, 255, 255))
img_textlines = img_textlines.astype(np.uint8)
imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 0, 255, 0)
contours_combined, hierachy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours_combined),'len textlines mixed')
areas_cnt_text = np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
contours_biggest = contours_combined[np.argmax(areas_cnt_text)]
# print(np.shape(contours_biggest))
# print(contours_biggest[:])
contours_biggest[:, 0, 0] = contours_biggest[:, 0, 0] - all_box_coord[int(region_final)][2]
contours_biggest[:, 0, 1] = contours_biggest[:, 0, 1] - all_box_coord[int(region_final)][0]
contours_biggest = contours_biggest.reshape(np.shape(contours_biggest)[0], np.shape(contours_biggest)[2])
all_found_texline_polygons[int(region_final)][arg_min] = contours_biggest
# all_found_texline_polygons[int(region_final)][arg_min]=contours_biggest
except:
pass
else:
pass
#####for i_drop in range(len(polygons_of_drop_capitals)):
#####for j_cont in range(len(contours_only_text_parent)):
#####img_con=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
#####img_con=cv2.fillPoly(img_con,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
#####img_con=cv2.fillPoly(img_con,pts=[contours_only_text_parent[j_cont]],color=(255,255,255))
#####img_con=img_con.astype(np.uint8)
######imgray = cv2.cvtColor(img_con, cv2.COLOR_BGR2GRAY)
######ret, thresh = cv2.threshold(imgray, 0, 255, 0)
######contours_new,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#####contours_new,hir_new=return_contours_of_image(img_con)
#####contours_new_parent=return_parent_contours( contours_new,hir_new)
######plt.imshow(img_con)
######plt.show()
#####try:
#####if len(contours_new_parent)==1:
######print(all_found_texline_polygons[j_cont][0])
#####cx_t,cy_t ,_, _, _ ,_,_= find_new_features_of_contoures(all_found_texline_polygons[j_cont])
######print(all_box_coord[j_cont])
######print(cx_t)
######print(cy_t)
######print(cx_d[i_drop])
######print(cy_d[i_drop])
#####y_lines=all_box_coord[j_cont][0]+np.array(cy_t)
######print(y_lines)
#####arg_min=np.argmin(np.abs(y_lines-y_min_d[i_drop]) )
######print(arg_min)
#####cnt_nearest=np.copy(all_found_texline_polygons[j_cont][arg_min])
#####cnt_nearest[:,0]=all_found_texline_polygons[j_cont][arg_min][:,0]+all_box_coord[j_cont][2]
#####cnt_nearest[:,1]=all_found_texline_polygons[j_cont][arg_min][:,1]+all_box_coord[j_cont][0]
#####img_textlines=np.zeros((text_regions_p.shape[0],text_regions_p.shape[1],3))
#####img_textlines=cv2.fillPoly(img_textlines,pts=[cnt_nearest],color=(255,255,255))
#####img_textlines=cv2.fillPoly(img_textlines,pts=[polygons_of_drop_capitals[i_drop] ],color=(255,255,255))
#####img_textlines=img_textlines.astype(np.uint8)
#####imgray = cv2.cvtColor(img_textlines, cv2.COLOR_BGR2GRAY)
#####ret, thresh = cv2.threshold(imgray, 0, 255, 0)
#####contours_combined,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#####areas_cnt_text=np.array([cv2.contourArea(contours_combined[j]) for j in range(len(contours_combined))])
#####contours_biggest=contours_combined[np.argmax(areas_cnt_text)]
######print(np.shape(contours_biggest))
######print(contours_biggest[:])
#####contours_biggest[:,0,0]=contours_biggest[:,0,0]-all_box_coord[j_cont][2]
#####contours_biggest[:,0,1]=contours_biggest[:,0,1]-all_box_coord[j_cont][0]
#####all_found_texline_polygons[j_cont][arg_min]=contours_biggest
######print(contours_biggest)
######plt.imshow(img_textlines[:,:,0])
######plt.show()
#####else:
#####pass
#####except:
#####pass
return all_found_texline_polygons
def filter_small_drop_capitals_from_no_patch_layout(layout_no_patch, layout1):
drop_only = (layout_no_patch[:, :, 0] == 4) * 1
contours_drop, hir_on_drop = return_contours_of_image(drop_only)
contours_drop_parent = return_parent_contours(contours_drop, hir_on_drop)
areas_cnt_text = np.array([cv2.contourArea(contours_drop_parent[j]) for j in range(len(contours_drop_parent))])
areas_cnt_text = areas_cnt_text / float(drop_only.shape[0] * drop_only.shape[1])
contours_drop_parent = [contours_drop_parent[jz] for jz in range(len(contours_drop_parent)) if areas_cnt_text[jz] > 0.001]
areas_cnt_text = [areas_cnt_text[jz] for jz in range(len(areas_cnt_text)) if areas_cnt_text[jz] > 0.001]
contours_drop_parent_final = []
for jj in range(len(contours_drop_parent)):
x, y, w, h = cv2.boundingRect(contours_drop_parent[jj])
# boxes.append([int(x), int(y), int(w), int(h)])
iou_of_box_and_contoure = float(drop_only.shape[0] * drop_only.shape[1]) * areas_cnt_text[jj] / float(w * h) * 100
height_to_weight_ratio = h / float(w)
weigh_to_height_ratio = w / float(h)
if iou_of_box_and_contoure > 60 and weigh_to_height_ratio < 1.2 and height_to_weight_ratio < 2:
map_of_drop_contour_bb = np.zeros((layout1.shape[0], layout1.shape[1]))
map_of_drop_contour_bb[y : y + h, x : x + w] = layout1[y : y + h, x : x + w]
if (((map_of_drop_contour_bb == 1) * 1).sum() / float(((map_of_drop_contour_bb == 5) * 1).sum()) * 100) >= 15:
contours_drop_parent_final.append(contours_drop_parent[jj])
layout_no_patch[:, :, 0][layout_no_patch[:, :, 0] == 4] = 0
layout_no_patch = cv2.fillPoly(layout_no_patch, pts=contours_drop_parent_final, color=(4, 4, 4))
return layout_no_patch

View file

@ -0,0 +1,3 @@
def isNaN(num):
return num != num

View file

@ -0,0 +1,252 @@
import numpy as np
import cv2
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
from .contour import find_new_features_of_contoures, return_contours_of_interested_region
from .resize import resize_image
from .rotate import rotate_image
def get_marginals(text_with_lines, text_regions, num_col, slope_deskew, kernel=None):
mask_marginals=np.zeros((text_with_lines.shape[0],text_with_lines.shape[1]))
mask_marginals=mask_marginals.astype(np.uint8)
text_with_lines=text_with_lines.astype(np.uint8)
##text_with_lines=cv2.erode(text_with_lines,self.kernel,iterations=3)
text_with_lines_eroded=cv2.erode(text_with_lines,kernel,iterations=5)
if text_with_lines.shape[0]<=1500:
pass
elif text_with_lines.shape[0]>1500 and text_with_lines.shape[0]<=1800:
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.5),text_with_lines.shape[1])
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=5)
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
else:
text_with_lines=resize_image(text_with_lines,int(text_with_lines.shape[0]*1.8),text_with_lines.shape[1])
text_with_lines=cv2.erode(text_with_lines,kernel,iterations=7)
text_with_lines=resize_image(text_with_lines,text_with_lines_eroded.shape[0],text_with_lines_eroded.shape[1])
text_with_lines_y=text_with_lines.sum(axis=0)
text_with_lines_y_eroded=text_with_lines_eroded.sum(axis=0)
thickness_along_y_percent=text_with_lines_y_eroded.max()/(float(text_with_lines.shape[0]))*100
#print(thickness_along_y_percent,'thickness_along_y_percent')
if thickness_along_y_percent<30:
min_textline_thickness=8
elif thickness_along_y_percent>=30 and thickness_along_y_percent<50:
min_textline_thickness=20
else:
min_textline_thickness=40
if thickness_along_y_percent>=14:
text_with_lines_y_rev=-1*text_with_lines_y[:]
#print(text_with_lines_y)
#print(text_with_lines_y_rev)
#plt.plot(text_with_lines_y)
#plt.show()
text_with_lines_y_rev=text_with_lines_y_rev-np.min(text_with_lines_y_rev)
#plt.plot(text_with_lines_y_rev)
#plt.show()
sigma_gaus=1
region_sum_0= gaussian_filter1d(text_with_lines_y, sigma_gaus)
region_sum_0_rev=gaussian_filter1d(text_with_lines_y_rev, sigma_gaus)
#plt.plot(region_sum_0_rev)
#plt.show()
region_sum_0_updown=region_sum_0[len(region_sum_0)::-1]
first_nonzero=(next((i for i, x in enumerate(region_sum_0) if x), None))
last_nonzero=(next((i for i, x in enumerate(region_sum_0_updown) if x), None))
last_nonzero=len(region_sum_0)-last_nonzero
##img_sum_0_smooth_rev=-region_sum_0
mid_point=(last_nonzero+first_nonzero)/2.
one_third_right=(last_nonzero-mid_point)/3.0
one_third_left=(mid_point-first_nonzero)/3.0
#img_sum_0_smooth_rev=img_sum_0_smooth_rev-np.min(img_sum_0_smooth_rev)
peaks, _ = find_peaks(text_with_lines_y_rev, height=0)
peaks=np.array(peaks)
#print(region_sum_0[peaks])
##plt.plot(region_sum_0)
##plt.plot(peaks,region_sum_0[peaks],'*')
##plt.show()
#print(first_nonzero,last_nonzero,peaks)
peaks=peaks[(peaks>first_nonzero) & ((peaks<last_nonzero))]
#print(first_nonzero,last_nonzero,peaks)
#print(region_sum_0[peaks]<10)
####peaks=peaks[region_sum_0[peaks]<25 ]
#print(region_sum_0[peaks])
peaks=peaks[region_sum_0[peaks]<min_textline_thickness ]
#print(peaks)
#print(first_nonzero,last_nonzero,one_third_right,one_third_left)
if num_col==1:
peaks_right=peaks[peaks>mid_point]
peaks_left=peaks[peaks<mid_point]
if num_col==2:
peaks_right=peaks[peaks>(mid_point+one_third_right)]
peaks_left=peaks[peaks<(mid_point-one_third_left)]
try:
point_right=np.min(peaks_right)
except:
point_right=last_nonzero
try:
point_left=np.max(peaks_left)
except:
point_left=first_nonzero
#print(point_left,point_right)
#print(text_regions.shape)
if point_right>=mask_marginals.shape[1]:
point_right=mask_marginals.shape[1]-1
try:
mask_marginals[:,point_left:point_right]=1
except:
mask_marginals[:,:]=1
#print(mask_marginals.shape,point_left,point_right,'nadosh')
mask_marginals_rotated=rotate_image(mask_marginals,-slope_deskew)
#print(mask_marginals_rotated.shape,'nadosh')
mask_marginals_rotated_sum=mask_marginals_rotated.sum(axis=0)
mask_marginals_rotated_sum[mask_marginals_rotated_sum!=0]=1
index_x=np.array(range(len(mask_marginals_rotated_sum)))+1
index_x_interest=index_x[mask_marginals_rotated_sum==1]
min_point_of_left_marginal=np.min(index_x_interest)-16
max_point_of_right_marginal=np.max(index_x_interest)+16
if min_point_of_left_marginal<0:
min_point_of_left_marginal=0
if max_point_of_right_marginal>=text_regions.shape[1]:
max_point_of_right_marginal=text_regions.shape[1]-1
#print(np.min(index_x_interest) ,np.max(index_x_interest),'minmaxnew')
#print(mask_marginals_rotated.shape,text_regions.shape,'mask_marginals_rotated')
#plt.imshow(mask_marginals)
#plt.show()
#plt.imshow(mask_marginals_rotated)
#plt.show()
text_regions[(mask_marginals_rotated[:,:]!=1) & (text_regions[:,:]==1)]=4
#plt.imshow(text_regions)
#plt.show()
pixel_img=4
min_area_text=0.00001
polygons_of_marginals=return_contours_of_interested_region(text_regions,pixel_img,min_area_text)
cx_text_only,cy_text_only ,x_min_text_only,x_max_text_only, y_min_text_only ,y_max_text_only,y_cor_x_min_main=find_new_features_of_contoures(polygons_of_marginals)
text_regions[(text_regions[:,:]==4)]=1
marginlas_should_be_main_text=[]
x_min_marginals_left=[]
x_min_marginals_right=[]
for i in range(len(cx_text_only)):
x_width_mar=abs(x_min_text_only[i]-x_max_text_only[i])
y_height_mar=abs(y_min_text_only[i]-y_max_text_only[i])
#print(x_width_mar,y_height_mar,y_height_mar/x_width_mar,'y_height_mar')
if x_width_mar>16 and y_height_mar/x_width_mar<18:
marginlas_should_be_main_text.append(polygons_of_marginals[i])
if x_min_text_only[i]<(mid_point-one_third_left):
x_min_marginals_left_new=x_min_text_only[i]
if len(x_min_marginals_left)==0:
x_min_marginals_left.append(x_min_marginals_left_new)
else:
x_min_marginals_left[0]=min(x_min_marginals_left[0],x_min_marginals_left_new)
else:
x_min_marginals_right_new=x_min_text_only[i]
if len(x_min_marginals_right)==0:
x_min_marginals_right.append(x_min_marginals_right_new)
else:
x_min_marginals_right[0]=min(x_min_marginals_right[0],x_min_marginals_right_new)
if len(x_min_marginals_left)==0:
x_min_marginals_left=[0]
if len(x_min_marginals_right)==0:
x_min_marginals_right=[text_regions.shape[1]-1]
#print(x_min_marginals_left[0],x_min_marginals_right[0],'margo')
#print(marginlas_should_be_main_text,'marginlas_should_be_main_text')
text_regions=cv2.fillPoly(text_regions, pts =marginlas_should_be_main_text, color=(4,4))
#print(np.unique(text_regions))
#text_regions[:,:int(x_min_marginals_left[0])][text_regions[:,:int(x_min_marginals_left[0])]==1]=0
#text_regions[:,int(x_min_marginals_right[0]):][text_regions[:,int(x_min_marginals_right[0]):]==1]=0
text_regions[:,:int(min_point_of_left_marginal)][text_regions[:,:int(min_point_of_left_marginal)]==1]=0
text_regions[:,int(max_point_of_right_marginal):][text_regions[:,int(max_point_of_right_marginal):]==1]=0
###text_regions[:,0:point_left][text_regions[:,0:point_left]==1]=4
###text_regions[:,point_right:][ text_regions[:,point_right:]==1]=4
#plt.plot(region_sum_0)
#plt.plot(peaks,region_sum_0[peaks],'*')
#plt.show()
#plt.imshow(text_regions)
#plt.show()
#sys.exit()
else:
pass
return text_regions

View file

@ -0,0 +1,24 @@
from PIL import Image
import numpy as np
from ocrd_models import OcrdExif
from cv2 import COLOR_GRAY2BGR, COLOR_RGB2BGR, cvtColor, imread
# from sbb_binarization
def cv2pil(img):
return Image.fromarray(img.astype('uint8'))
def pil2cv(img):
# from ocrd/workspace.py
color_conversion = COLOR_GRAY2BGR if img.mode in ('1', 'L') else COLOR_RGB2BGR
pil_as_np_array = np.array(img).astype('uint8') if img.mode == '1' else np.array(img)
return cvtColor(pil_as_np_array, color_conversion)
def check_dpi(image_filename):
exif = OcrdExif(Image.open(image_filename))
print(exif.to_xml())
resolution = exif.resolution
if exif.resolutionUnit == 'cm':
resolution /= 2.54
return int(resolution)

View file

@ -0,0 +1,4 @@
import cv2
def resize_image(img_in, input_height, input_width):
return cv2.resize(img_in, (input_width, input_height), interpolation=cv2.INTER_NEAREST)

View file

@ -0,0 +1,85 @@
import math
import imutils
import cv2
def rotatedRectWithMaxArea(w, h, angle):
if w <= 0 or h <= 0:
return 0, 0
width_is_longer = w >= h
side_long, side_short = (w, h) if width_is_longer else (h, w)
# since the solutions for angle, -angle and 180-angle are all the same,
# if suffices to look at the first quadrant and the absolute values of sin,cos:
sin_a, cos_a = abs(math.sin(angle)), abs(math.cos(angle))
if side_short <= 2.0 * sin_a * cos_a * side_long or abs(sin_a - cos_a) < 1e-10:
# half constrained case: two crop corners touch the longer side,
# the other two corners are on the mid-line parallel to the longer line
x = 0.5 * side_short
wr, hr = (x / sin_a, x / cos_a) if width_is_longer else (x / cos_a, x / sin_a)
else:
# fully constrained case: crop touches all 4 sides
cos_2a = cos_a * cos_a - sin_a * sin_a
wr, hr = (w * cos_a - h * sin_a) / cos_2a, (h * cos_a - w * sin_a) / cos_2a
return wr, hr
def rotate_max_area_new(image, rotated, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2]
def rotation_image_new(img, thetha):
rotated = imutils.rotate(img, thetha)
return rotate_max_area_new(img, rotated, thetha)
def rotate_image(img_patch, slope):
(h, w) = img_patch.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, slope, 1.0)
return cv2.warpAffine(img_patch, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
def rotyate_image_different( img, slope):
# img = cv2.imread('images/input.jpg')
num_rows, num_cols = img.shape[:2]
rotation_matrix = cv2.getRotationMatrix2D((num_cols / 2, num_rows / 2), slope, 1)
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
return img_rotation
def rotate_max_area(image, rotated, rotated_textline, rotated_layout, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2]
def rotation_not_90_func(img, textline, text_regions_p_1, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
return rotate_max_area(img, rotated, rotated_textline, rotated_layout, thetha)
def rotation_not_90_func_full_layout(img, textline, text_regions_p_1, text_regions_p_fully, thetha):
rotated = imutils.rotate(img, thetha)
rotated_textline = imutils.rotate(textline, thetha)
rotated_layout = imutils.rotate(text_regions_p_1, thetha)
rotated_layout_full = imutils.rotate(text_regions_p_fully, thetha)
return rotate_max_area_full_layout(img, rotated, rotated_textline, rotated_layout, rotated_layout_full, thetha)
def rotate_max_area_full_layout(image, rotated, rotated_textline, rotated_layout, rotated_layout_full, angle):
wr, hr = rotatedRectWithMaxArea(image.shape[1], image.shape[0], math.radians(angle))
h, w, _ = rotated.shape
y1 = h // 2 - int(hr / 2)
y2 = y1 + int(hr)
x1 = w // 2 - int(wr / 2)
x2 = x1 + int(wr)
return rotated[y1:y2, x1:x2], rotated_textline[y1:y2, x1:x2], rotated_layout[y1:y2, x1:x2], rotated_layout_full[y1:y2, x1:x2]

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,62 @@
from lxml import etree as ET
NAMESPACES = {}
NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
NAMESPACES['xsi'] = "http://www.w3.org/2001/XMLSchema-instance"
NAMESPACES[None] = NAMESPACES['page']
def create_page_xml(imageFilename, height, width):
pcgts = ET.Element("PcGts", nsmap=NAMESPACES)
pcgts.set("{%s}schemaLocation" % NAMESPACES['xsi'], NAMESPACES['page'])
metadata = ET.SubElement(pcgts, "Metadata")
author = ET.SubElement(metadata, "Creator")
author.text = "SBB_QURATOR"
created = ET.SubElement(metadata, "Created")
created.text = "2019-06-17T18:15:12"
changetime = ET.SubElement(metadata, "LastChange")
changetime.text = "2019-06-17T18:15:12"
page = ET.SubElement(pcgts, "Page")
page.set("imageFilename", imageFilename)
page.set("imageHeight", str(height))
page.set("imageWidth", str(width))
page.set("type", "content")
page.set("readingDirection", "left-to-right")
page.set("textLineOrder", "top-to-bottom")
return pcgts, page
def add_textequiv(parent, text=''):
textequiv = ET.SubElement(parent, 'TextEquiv')
unireg = ET.SubElement(textequiv, 'Unicode')
unireg.text = text
def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals):
"""
XXX side-effect: extends id_of_marginalia
"""
region_order = ET.SubElement(page, 'ReadingOrder')
region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
region_order_sub.set('id', "ro357564684568544579089")
indexer_region = 0
for vj in order_of_texts:
name = "coord_text_%s" % vj
name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region))
name.set('regionRef', id_of_texts[vj])
indexer_region += 1
for vm in range(len(found_polygons_marginals)):
id_of_marginalia.append('r%s' % indexer_region)
name = "coord_text_%s" % indexer_region
name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region))
name.set('regionRef', 'r%s' % indexer_region)
indexer_region += 1
return id_of_marginalia

272
qurator/eynollah/writer.py Normal file
View file

@ -0,0 +1,272 @@
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
from pathlib import Path
import os.path
from .utils.xml import create_page_xml, add_textequiv, xml_reading_order
from ocrd_utils import getLogger
from lxml import etree as ET
import numpy as np
class EynollahXmlWriter():
def __init__(self, *, dir_out, image_filename, curved_line):
self.logger = getLogger('eynollah.writer')
self.dir_out = dir_out
self.image_filename = image_filename
self.image_filename_stem = Path(Path(image_filename).name).stem
self.curved_line = curved_line
self.scale_x = None # XXX set outside __init__
self.scale_y = None # XXX set outside __init__
self.height_org = None # XXX set outside __init__
self.width_org = None # XXX set outside __init__
def calculate_page_coords(self, cont_page):
self.logger.debug('enter calculate_page_coords')
points_page_print = ""
for _, contour in enumerate(cont_page[0]):
if len(contour) == 2:
points_page_print += str(int((contour[0]) / self.scale_x))
points_page_print += ','
points_page_print += str(int((contour[1]) / self.scale_y))
else:
points_page_print += str(int((contour[0][0]) / self.scale_x))
points_page_print += ','
points_page_print += str(int((contour[0][1] ) / self.scale_y))
points_page_print = points_page_print + ' '
return points_page_print[:-1]
def serialize_lines_in_marginal(self, marginal, all_found_texline_polygons_marginals, marginal_idx, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l):
for j in range(len(all_found_texline_polygons_marginals[marginal_idx])):
textline = ET.SubElement(marginal, 'TextLine')
textline.set('id', 'l%s' % id_indexer_l)
id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords')
add_textequiv(textline)
points_co = ''
for l in range(len(all_found_texline_polygons_marginals[marginal_idx][j])):
if not self.curved_line:
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0])/self.scale_y))
if self.curved_line and np.abs(slopes_marginals[marginal_idx]) <= 45:
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + page_coord[0]) / self.scale_y))
elif self.curved_line and np.abs(slopes_marginals[marginal_idx]) > 45:
if len(all_found_texline_polygons_marginals[marginal_idx][j][l]) == 2:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][0] + all_box_coord_marginals[marginal_idx][2] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons_marginals[marginal_idx][j][l][0][1] + all_box_coord_marginals[marginal_idx][0] + page_coord[0]) / self.scale_y))
if l < len(all_found_texline_polygons_marginals[marginal_idx][j]) - 1:
points_co += ' '
coord.set('points',points_co)
return id_indexer_l
def serialize_lines_in_region(self, textregion, all_found_texline_polygons, region_idx, page_coord, all_box_coord, slopes, id_indexer_l):
self.logger.debug('enter serialize_lines_in_region')
for j in range(len(all_found_texline_polygons[region_idx])):
textline = ET.SubElement(textregion, 'TextLine')
textline.set('id', 'l%s' % id_indexer_l)
id_indexer_l += 1
coord = ET.SubElement(textline, 'Coords')
add_textequiv(textline)
points_co = ''
for l in range(len(all_found_texline_polygons[region_idx][j])):
if not self.curved_line:
if len(all_found_texline_polygons[region_idx][j][l])==2:
textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
else:
textline_x_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2] + page_coord[2]) / self.scale_x))
textline_y_coord = max(0, int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0] + page_coord[0]) / self.scale_y))
points_co += str(textline_x_coord)
points_co += ','
points_co += str(textline_y_coord)
if self.curved_line and np.abs(slopes[region_idx]) <= 45:
if len(all_found_texline_polygons[region_idx][j][l]) == 2:
points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + page_coord[0]) / self.scale_y))
else:
points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + page_coord[0])/self.scale_y))
elif self.curved_line and np.abs(slopes[region_idx]) > 45:
if len(all_found_texline_polygons[region_idx][j][l])==2:
points_co += str(int((all_found_texline_polygons[region_idx][j][l][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons[region_idx][j][l][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
else:
points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][0] + all_box_coord[region_idx][2]+page_coord[2])/self.scale_x))
points_co += ','
points_co += str(int((all_found_texline_polygons[region_idx][j][l][0][1] + all_box_coord[region_idx][0]+page_coord[0])/self.scale_y))
if l < len(all_found_texline_polygons[region_idx][j]) - 1:
points_co += ' '
coord.set('points',points_co)
return id_indexer_l
def write_pagexml(self, pcgts):
self.logger.info("filename stem: '%s'", self.image_filename_stem)
tree = ET.ElementTree(pcgts)
tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml")
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
self.logger.debug('enter build_pagexml_no_full_layout')
# create the file structure
pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
page_print_sub = ET.SubElement(page, "Border")
coord_page = ET.SubElement(page_print_sub, "Coords")
coord_page.set('points', self.calculate_page_coords(cont_page))
id_of_marginalia = []
id_indexer = 0
id_indexer_l = 0
if len(found_polygons_text_region) > 0:
id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
for mm in range(len(found_polygons_text_region)):
textregion = ET.SubElement(page, 'TextRegion')
textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1
textregion.set('type', 'paragraph')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
add_textequiv(textregion)
for mm in range(len(found_polygons_marginals)):
marginal = ET.SubElement(page, 'TextRegion')
marginal.set('id', id_of_marginalia[mm])
marginal.set('type', 'marginalia')
coord_text = ET.SubElement(marginal, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
for mm in range(len(found_polygons_text_region_img)):
textregion = ET.SubElement(page, 'ImageRegion')
textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1
coord_text = ET.SubElement(textregion, 'Coords')
points_co = ''
for lmm in range(len(found_polygons_text_region_img[mm])):
points_co += str(int((found_polygons_text_region_img[mm][lmm,0,0] + page_coord[2]) / self.scale_x))
points_co += ','
points_co += str(int((found_polygons_text_region_img[mm][lmm,0,1] + page_coord[0]) / self.scale_y))
if lmm < len(found_polygons_text_region_img[mm]) - 1:
points_co += ' '
coord_text.set('points', points_co)
return pcgts
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals, cont_page):
self.logger.debug('enter build_pagexml_full_layout')
# create the file structure
pcgts, page = create_page_xml(self.image_filename, self.height_org, self.width_org)
page_print_sub = ET.SubElement(page, "Border")
coord_page = ET.SubElement(page_print_sub, "Coords")
coord_page.set('points', self.calculate_page_coords(cont_page))
id_indexer = 0
id_indexer_l = 0
id_of_marginalia = []
if len(found_polygons_text_region) > 0:
id_of_marginalia = xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found_polygons_marginals)
for mm in range(len(found_polygons_text_region)):
textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1
textregion.set('type', 'paragraph')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons, mm, page_coord, all_box_coord, slopes, id_indexer_l)
add_textequiv(textregion)
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
if len(found_polygons_text_region_h) > 0:
for mm in range(len(found_polygons_text_region_h)):
textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1
textregion.set('type','header')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_h, mm, page_coord))
id_indexer_l = self.serialize_lines_in_region(textregion, all_found_texline_polygons_h, mm, page_coord, all_box_coord_h, slopes, id_indexer_l)
add_textequiv(textregion)
if len(found_polygons_drop_capitals) > 0:
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals)
for mm in range(len(found_polygons_drop_capitals)):
textregion=ET.SubElement(page, 'TextRegion')
textregion.set('id',' r%s' % id_indexer)
id_indexer += 1
textregion.set('type', 'drop-capital')
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_drop_capitals, mm, page_coord))
add_textequiv(textregion)
for mm in range(len(found_polygons_marginals)):
marginal = ET.SubElement(page, 'TextRegion')
add_textequiv(textregion)
marginal.set('id', id_of_marginalia[mm])
marginal.set('type', 'marginalia')
coord_text = ET.SubElement(marginal, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_marginals, mm, page_coord))
id_indexer_l = self.serialize_lines_in_marginal(marginal, all_found_texline_polygons_marginals, mm, page_coord, all_box_coord_marginals, slopes_marginals, id_indexer_l)
id_indexer = len(found_polygons_text_region) + len(found_polygons_text_region_h) + len(found_polygons_marginals) + len(found_polygons_drop_capitals)
for mm in range(len(found_polygons_text_region_img)):
textregion=ET.SubElement(page, 'ImageRegion')
textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_text_region_img, mm, page_coord))
for mm in range(len(found_polygons_tables)):
textregion = ET.SubElement(page, 'TableRegion')
textregion.set('id', 'r%s' %id_indexer)
id_indexer += 1
coord_text = ET.SubElement(textregion, 'Coords')
coord_text.set('points', self.calculate_polygon_coords(found_polygons_tables, mm, page_coord))
return pcgts
def calculate_polygon_coords(self, contour_list, i, page_coord):
self.logger.debug('enter calculate_polygon_coords')
coords = ''
for j in range(len(contour_list[i])):
if len(contour_list[i][j]) == 2:
coords += str(int((contour_list[i][j][0] + page_coord[2]) / self.scale_x))
coords += ','
coords += str(int((contour_list[i][j][1] + page_coord[0]) / self.scale_y))
else:
coords += str(int((contour_list[i][j][0][0] + page_coord[2]) / self.scale_x))
coords += ','
coords += str(int((contour_list[i][j][0][1] + page_coord[0]) / self.scale_y))
if j < len(contour_list[i]) - 1:
coords=coords + ' '
return coords