import click
import sys
import os
import numpy as np
import warnings
import xml . etree . ElementTree as ET
from tqdm import tqdm
import cv2
from shapely import geometry
import json
with warnings . catch_warnings ( ) :
warnings . simplefilter ( " ignore " )
__doc__ = \
tool to extract 2 d or 3 d RGB images from page xml data . In former case output will be 1
2 D image array which each class has filled with a pixel value . In the case of 3 D RGB image
each class will be defined with a RGB value and beside images a text file of classes also will be produced .
This classes . txt file is required for dhsegment tool .
KERNEL = np . ones ( ( 5 , 5 ) , np . uint8 )
class pagexml2label :
def __init__ ( self , dir_in , out_dir , output_type , config ) :
self . dir = dir_in
self . output_dir = out_dir
self . output_type = output_type
self . config = config
def get_content_of_dir ( self ) :
Listing all ground truth page xml files . All files are needed to have xml format .
gt_all = os . listdir ( self . dir )
self . gt_list = [ file for file in gt_all if file . split ( ' . ' ) [ len ( file . split ( ' . ' ) ) - 1 ] == ' xml ' ]
def return_parent_contours ( self , contours , hierarchy ) :
contours_parent = [ contours [ i ] for i in range ( len ( contours ) ) if hierarchy [ 0 ] [ i ] [ 3 ] == - 1 ]
return contours_parent
def filter_contours_area_of_image_tables ( self , image , contours , hierarchy , max_area , min_area ) :
found_polygons_early = list ( )
jv = 0
for c in contours :
if len ( c ) < 3 : # A polygon cannot have less than 3 points
polygon = geometry . Polygon ( [ point [ 0 ] for point in c ] )
# area = cv2.contourArea(c)
area = polygon . area
# Check that polygon has area greater than minimal area
# print(hierarchy[0][jv][3],hierarchy )
if area > = min_area * np . prod ( image . shape [ : 2 ] ) and area < = max_area * np . prod ( image . shape [ : 2 ] ) : # and hierarchy[0][jv][3]==-1 :
# print(c[0][0][1])
found_polygons_early . append ( np . array ( [ [ point ] for point in polygon . exterior . coords ] , dtype = np . int32 ) )
jv + = 1
return found_polygons_early
def return_contours_of_interested_region ( self , region_pre_p , pixel , min_area = 0.0002 ) :
# pixels of images are identified by 5
if len ( region_pre_p . shape ) == 3 :
cnts_images = ( region_pre_p [ : , : , 0 ] == pixel ) * 1
else :
cnts_images = ( region_pre_p [ : , : ] == pixel ) * 1
cnts_images = cnts_images . astype ( np . uint8 )
cnts_images = np . repeat ( cnts_images [ : , : , np . newaxis ] , 3 , axis = 2 )
imgray = cv2 . cvtColor ( cnts_images , cv2 . COLOR_BGR2GRAY )
ret , thresh = cv2 . threshold ( imgray , 0 , 255 , 0 )
contours_imgs , hierarchy = cv2 . findContours ( thresh , cv2 . RETR_TREE , cv2 . CHAIN_APPROX_SIMPLE )
contours_imgs = self . return_parent_contours ( contours_imgs , hierarchy )
contours_imgs = self . filter_contours_area_of_image_tables ( thresh , contours_imgs , hierarchy , max_area = 1 , min_area = min_area )
return contours_imgs
def update_region_contours ( self , co_text , img_boundary , erosion_rate , dilation_rate , y_len , x_len ) :
co_text_eroded = [ ]
for con in co_text :
img_boundary_in = np . zeros ( ( y_len , x_len ) )
img_boundary_in = cv2 . fillPoly ( img_boundary_in , pts = [ con ] , color = ( 1 , 1 , 1 ) )
#img_boundary_in = cv2.erode(img_boundary_in[:,:], KERNEL, iterations=7)#asiatica
if erosion_rate > 0 :
img_boundary_in = cv2 . erode ( img_boundary_in [ : , : ] , KERNEL , iterations = erosion_rate )
pixel = 1
min_size = 0
con_eroded = self . return_contours_of_interested_region ( img_boundary_in , pixel , min_size )
try :
co_text_eroded . append ( con_eroded [ 0 ] )
except :
co_text_eroded . append ( con )
img_boundary_in_dilated = cv2 . dilate ( img_boundary_in [ : , : ] , KERNEL , iterations = dilation_rate )
#img_boundary_in_dilated = cv2.dilate(img_boundary_in[:,:], KERNEL, iterations=5)
boundary = img_boundary_in_dilated [ : , : ] - img_boundary_in [ : , : ]
img_boundary [ : , : ] [ boundary [ : , : ] == 1 ] = 1
return co_text_eroded , img_boundary
def get_images_of_ground_truth ( self , config_params ) :
Reading the page xml files and write the ground truth images into given output directory .
## to do: add footnote to text regions
for index in tqdm ( range ( len ( self . gt_list ) ) ) :
tree1 = ET . parse ( self . dir + ' / ' + self . gt_list [ index ] )
root1 = tree1 . getroot ( )
alltags = [ elem . tag for elem in root1 . iter ( ) ]
link = alltags [ 0 ] . split ( ' } ' ) [ 0 ] + ' } '
for jj in root1 . iter ( link + ' Page ' ) :
y_len = int ( jj . attrib [ ' imageHeight ' ] )
x_len = int ( jj . attrib [ ' imageWidth ' ] )
if self . config and ( config_params [ ' use_case ' ] == ' textline ' or config_params [ ' use_case ' ] == ' word ' or config_params [ ' use_case ' ] == ' glyph ' ) :
keys = list ( config_params . keys ( ) )
if " artificial_class_label " in keys :
artificial_class_rgb_color = ( 255 , 255 , 0 )
artificial_class_label = config_params [ ' artificial_class_label ' ]
textline_rgb_color = ( 255 , 0 , 0 )
if config_params [ ' use_case ' ] == ' textline ' :
region_tags = np . unique ( [ x for x in alltags if x . endswith ( ' TextLine ' ) ] )
elif config_params [ ' use_case ' ] == ' word ' :
region_tags = np . unique ( [ x for x in alltags if x . endswith ( ' Word ' ) ] )
elif config_params [ ' use_case ' ] == ' glyph ' :
region_tags = np . unique ( [ x for x in alltags if x . endswith ( ' Glyph ' ) ] )
co_use_case = [ ]
for tag in region_tags :
if config_params [ ' use_case ' ] == ' textline ' :
tag_endings = [ ' }TextLine ' , ' }textline ' ]
elif config_params [ ' use_case ' ] == ' word ' :
tag_endings = [ ' }Word ' , ' }word ' ]
elif config_params [ ' use_case ' ] == ' glyph ' :
tag_endings = [ ' }Glyph ' , ' }glyph ' ]
if tag . endswith ( tag_endings [ 0 ] ) or tag . endswith ( tag_endings [ 1 ] ) :
for nn in root1 . iter ( tag ) :
c_t_in = [ ]
sumi = 0
for vv in nn . iter ( ) :
# check the format of coords
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
c_t_in . append (
np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
c_t_in . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
elif vv . tag != link + ' Point ' and sumi > = 1 :
co_use_case . append ( np . array ( c_t_in ) )
if " artificial_class_label " in keys :
img_boundary = np . zeros ( ( y_len , x_len ) )
erosion_rate = 1
dilation_rate = 3
co_use_case , img_boundary = self . update_region_contours ( co_use_case , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
img = np . zeros ( ( y_len , x_len , 3 ) )
if self . output_type == ' 2d ' :
img_poly = cv2 . fillPoly ( img , pts = co_use_case , color = ( 1 , 1 , 1 ) )
if " artificial_class_label " in keys :
img_poly [ : , : ] [ img_boundary [ : , : ] == 1 ] = artificial_class_label
elif self . output_type == ' 3d ' :
img_poly = cv2 . fillPoly ( img , pts = co_use_case , color = textline_rgb_color )
if " artificial_class_label " in keys :
img_poly [ : , : , 0 ] [ img_boundary [ : , : ] == 1 ] = artificial_class_rgb_color [ 0 ]
img_poly [ : , : , 1 ] [ img_boundary [ : , : ] == 1 ] = artificial_class_rgb_color [ 1 ]
img_poly [ : , : , 2 ] [ img_boundary [ : , : ] == 1 ] = artificial_class_rgb_color [ 2 ]
try :
cv2 . imwrite ( self . output_dir + ' / ' + self . gt_list [ index ] . split ( ' - ' ) [ 1 ] . split ( ' . ' ) [ 0 ] + ' .png ' ,
img_poly )
except :
cv2 . imwrite ( self . output_dir + ' / ' + self . gt_list [ index ] . split ( ' . ' ) [ 0 ] + ' .png ' , img_poly )
if self . config and config_params [ ' use_case ' ] == ' layout ' :
keys = list ( config_params . keys ( ) )
if " artificial_class_on_boundry " in keys :
elements_with_artificial_class = list ( config_params [ ' artificial_class_on_boundry ' ] )
artificial_class_rgb_color = ( 255 , 255 , 0 )
artificial_class_label = config_params [ ' artificial_class_label ' ]
#values = config_params.values()
if ' textregions ' in keys :
types_text_dict = config_params [ ' textregions ' ]
types_text = list ( types_text_dict . keys ( ) )
types_text_label = list ( types_text_dict . values ( ) )
print ( types_text )
if ' graphicregions ' in keys :
types_graphic_dict = config_params [ ' graphicregions ' ]
types_graphic = list ( types_graphic_dict . keys ( ) )
types_graphic_label = list ( types_graphic_dict . values ( ) )
labels_rgb_color = [ ( 0 , 0 , 0 ) , ( 255 , 0 , 0 ) , ( 255 , 125 , 0 ) , ( 255 , 0 , 125 ) , ( 125 , 255 , 125 ) , ( 125 , 125 , 0 ) , ( 0 , 125 , 255 ) , ( 0 , 125 , 0 ) , ( 125 , 125 , 125 ) , ( 255 , 0 , 255 ) , ( 125 , 0 , 125 ) , ( 0 , 255 , 0 ) , ( 0 , 0 , 255 ) , ( 0 , 255 , 255 ) , ( 255 , 125 , 125 ) , ( 0 , 125 , 125 ) , ( 0 , 255 , 125 ) , ( 255 , 125 , 255 ) , ( 125 , 255 , 0 ) ]
region_tags = np . unique ( [ x for x in alltags if x . endswith ( ' Region ' ) ] )
co_text_paragraph = [ ]
co_text_footnote = [ ]
co_text_footnote_con = [ ]
co_text_drop = [ ]
co_text_heading = [ ]
co_text_header = [ ]
co_text_marginalia = [ ]
co_text_catch = [ ]
co_text_page_number = [ ]
co_text_signature_mark = [ ]
co_sep = [ ]
co_img = [ ]
co_table = [ ]
co_graphic_signature = [ ]
co_graphic_text_annotation = [ ]
co_graphic_decoration = [ ]
co_graphic_stamp = [ ]
co_noise = [ ]
for tag in region_tags :
if ' textregions ' in keys :
if tag . endswith ( ' }TextRegion ' ) or tag . endswith ( ' }Textregion ' ) :
for nn in root1 . iter ( tag ) :
c_t_in_drop = [ ]
c_t_in_paragraph = [ ]
c_t_in_heading = [ ]
c_t_in_header = [ ]
c_t_in_page_number = [ ]
c_t_in_signature_mark = [ ]
c_t_in_catch = [ ]
c_t_in_marginalia = [ ]
c_t_in_footnote = [ ]
c_t_in_footnote_con = [ ]
sumi = 0
for vv in nn . iter ( ) :
# check the format of coords
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
if " drop-capital " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' drop-capital ' :
c_t_in_drop . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " footnote " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' footnote ' :
c_t_in_footnote . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " footnote-continued " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' footnote-continued ' :
c_t_in_footnote_con . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " heading " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' heading ' :
c_t_in_heading . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " signature-mark " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' signature-mark ' :
c_t_in_signature_mark . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " header " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' header ' :
c_t_in_header . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " catch-word " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' catch-word ' :
c_t_in_catch . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " page-number " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' page-number ' :
c_t_in_page_number . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " marginalia " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' marginalia ' :
c_t_in_marginalia . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " paragraph " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' paragraph ' :
c_t_in_paragraph . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
if " drop-capital " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' drop-capital ' :
c_t_in_drop . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " footnote " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' footnote ' :
c_t_in_footnote . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " footnote-continued " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' footnote-continued ' :
c_t_in_footnote_con . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " heading " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' heading ' :
c_t_in_heading . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " signature-mark " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' signature-mark ' :
c_t_in_signature_mark . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " header " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' header ' :
c_t_in_header . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " catch-word " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' catch-word ' :
c_t_in_catch . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " page-number " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' page-number ' :
c_t_in_page_number . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " marginalia " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' marginalia ' :
c_t_in_marginalia . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " paragraph " in types_text :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' paragraph ' :
c_t_in_paragraph . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
elif vv . tag != link + ' Point ' and sumi > = 1 :
if len ( c_t_in_drop ) > 0 :
co_text_drop . append ( np . array ( c_t_in_drop ) )
if len ( c_t_in_footnote_con ) > 0 :
co_text_footnote_con . append ( np . array ( c_t_in_footnote_con ) )
if len ( c_t_in_footnote ) > 0 :
co_text_footnote . append ( np . array ( c_t_in_footnote ) )
if len ( c_t_in_paragraph ) > 0 :
co_text_paragraph . append ( np . array ( c_t_in_paragraph ) )
if len ( c_t_in_heading ) > 0 :
co_text_heading . append ( np . array ( c_t_in_heading ) )
if len ( c_t_in_header ) > 0 :
co_text_header . append ( np . array ( c_t_in_header ) )
if len ( c_t_in_page_number ) > 0 :
co_text_page_number . append ( np . array ( c_t_in_page_number ) )
if len ( c_t_in_catch ) > 0 :
co_text_catch . append ( np . array ( c_t_in_catch ) )
if len ( c_t_in_signature_mark ) > 0 :
co_text_signature_mark . append ( np . array ( c_t_in_signature_mark ) )
if len ( c_t_in_marginalia ) > 0 :
co_text_marginalia . append ( np . array ( c_t_in_marginalia ) )
if ' graphicregions ' in keys :
if tag . endswith ( ' }GraphicRegion ' ) or tag . endswith ( ' }graphicregion ' ) :
for nn in root1 . iter ( tag ) :
c_t_in_stamp = [ ]
c_t_in_text_annotation = [ ]
c_t_in_decoration = [ ]
c_t_in_signature = [ ]
sumi = 0
for vv in nn . iter ( ) :
# check the format of coords
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
if " handwritten-annotation " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' handwritten-annotation ' :
c_t_in_text_annotation . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " decoration " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' decoration ' :
c_t_in_decoration . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " stamp " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' stamp ' :
c_t_in_stamp . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
if " signature " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' signature ' :
c_t_in_signature . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
if " handwritten-annotation " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' handwritten-annotation ' :
c_t_in_text_annotation . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " decoration " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' decoration ' :
c_t_in_decoration . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " stamp " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' stamp ' :
c_t_in_stamp . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if " signature " in types_graphic :
if " type " in nn . attrib and nn . attrib [ ' type ' ] == ' signature ' :
c_t_in_signature . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
if len ( c_t_in_text_annotation ) > 0 :
co_graphic_text_annotation . append ( np . array ( c_t_in_text_annotation ) )
if len ( c_t_in_decoration ) > 0 :
co_graphic_decoration . append ( np . array ( c_t_in_decoration ) )
if len ( c_t_in_stamp ) > 0 :
co_graphic_stamp . append ( np . array ( c_t_in_stamp ) )
if len ( c_t_in_signature ) > 0 :
co_graphic_signature . append ( np . array ( c_t_in_signature ) )
if ' imageregion ' in keys :
if tag . endswith ( ' }ImageRegion ' ) or tag . endswith ( ' }imageregion ' ) :
for nn in root1 . iter ( tag ) :
c_t_in = [ ]
sumi = 0
for vv in nn . iter ( ) :
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
c_t_in . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
c_t_in . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
elif vv . tag != link + ' Point ' and sumi > = 1 :
co_img . append ( np . array ( c_t_in ) )
if ' separatorregion ' in keys :
if tag . endswith ( ' }SeparatorRegion ' ) or tag . endswith ( ' }separatorregion ' ) :
for nn in root1 . iter ( tag ) :
c_t_in = [ ]
sumi = 0
for vv in nn . iter ( ) :
# check the format of coords
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
c_t_in . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
c_t_in . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
elif vv . tag != link + ' Point ' and sumi > = 1 :
co_sep . append ( np . array ( c_t_in ) )
if ' tableregion ' in keys :
if tag . endswith ( ' }TableRegion ' ) or tag . endswith ( ' }tableregion ' ) :
for nn in root1 . iter ( tag ) :
c_t_in = [ ]
sumi = 0
for vv in nn . iter ( ) :
# check the format of coords
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
c_t_in . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
c_t_in . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
elif vv . tag != link + ' Point ' and sumi > = 1 :
co_table . append ( np . array ( c_t_in ) )
if ' noiseregion ' in keys :
if tag . endswith ( ' }NoiseRegion ' ) or tag . endswith ( ' }noiseregion ' ) :
for nn in root1 . iter ( tag ) :
c_t_in = [ ]
sumi = 0
for vv in nn . iter ( ) :
# check the format of coords
if vv . tag == link + ' Coords ' :
coords = bool ( vv . attrib )
if coords :
p_h = vv . attrib [ ' points ' ] . split ( ' ' )
c_t_in . append ( np . array ( [ [ int ( x . split ( ' , ' ) [ 0 ] ) , int ( x . split ( ' , ' ) [ 1 ] ) ] for x in p_h ] ) )
else :
if vv . tag == link + ' Point ' :
c_t_in . append ( [ int ( np . float ( vv . attrib [ ' x ' ] ) ) , int ( np . float ( vv . attrib [ ' y ' ] ) ) ] )
sumi + = 1
elif vv . tag != link + ' Point ' and sumi > = 1 :
co_noise . append ( np . array ( c_t_in ) )
if " artificial_class_on_boundry " in keys :
img_boundary = np . zeros ( ( y_len , x_len ) )
if " paragraph " in elements_with_artificial_class :
erosion_rate = 2
dilation_rate = 4
co_text_paragraph , img_boundary = self . update_region_contours ( co_text_paragraph , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " drop-capital " in elements_with_artificial_class :
erosion_rate = 0
dilation_rate = 4
co_text_drop , img_boundary = self . update_region_contours ( co_text_drop , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " catch-word " in elements_with_artificial_class :
erosion_rate = 0
dilation_rate = 4
co_text_catch , img_boundary = self . update_region_contours ( co_text_catch , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " page-number " in elements_with_artificial_class :
erosion_rate = 0
dilation_rate = 4
co_text_page_number , img_boundary = self . update_region_contours ( co_text_page_number , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " header " in elements_with_artificial_class :
erosion_rate = 1
dilation_rate = 4
co_text_header , img_boundary = self . update_region_contours ( co_text_header , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " heading " in elements_with_artificial_class :
erosion_rate = 1
dilation_rate = 4
co_text_heading , img_boundary = self . update_region_contours ( co_text_heading , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " signature-mark " in elements_with_artificial_class :
erosion_rate = 1
dilation_rate = 4
co_text_signature_mark , img_boundary = self . update_region_contours ( co_text_signature_mark , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " marginalia " in elements_with_artificial_class :
erosion_rate = 2
dilation_rate = 4
co_text_marginalia , img_boundary = self . update_region_contours ( co_text_marginalia , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " footnote " in elements_with_artificial_class :
erosion_rate = 2
dilation_rate = 4
co_text_footnote , img_boundary = self . update_region_contours ( co_text_footnote , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
if " footnote-continued " in elements_with_artificial_class :
erosion_rate = 2
dilation_rate = 4
co_text_footnote_con , img_boundary = self . update_region_contours ( co_text_footnote_con , img_boundary , erosion_rate , dilation_rate , y_len , x_len )
img = np . zeros ( ( y_len , x_len , 3 ) )
if self . output_type == ' 3d ' :
if ' graphicregions ' in keys :
if " handwritten-annotation " in types_graphic :
img_poly = cv2 . fillPoly ( img , pts = co_graphic_text_annotation , color = labels_rgb_color [ config_params [ ' graphicregions ' ] [ ' handwritten-annotation ' ] ] )
if " signature " in types_graphic :
img_poly = cv2 . fillPoly ( img , pts = co_graphic_signature , color = labels_rgb_color [ config_params [ ' graphicregions ' ] [ ' signature ' ] ] )
if " decoration " in types_graphic :
img_poly = cv2 . fillPoly ( img , pts = co_graphic_decoration , color = labels_rgb_color [ config_params [ ' graphicregions ' ] [ ' decoration ' ] ] )
if " stamp " in types_graphic :
img_poly = cv2 . fillPoly ( img , pts = co_graphic_stamp , color = labels_rgb_color [ config_params [ ' graphicregions ' ] [ ' stamp ' ] ] )
if ' imageregion ' in keys :
img_poly = cv2 . fillPoly ( img , pts = co_img , color = labels_rgb_color [ config_params [ ' imageregion ' ] ] )
if ' separatorregion ' in keys :
img_poly = cv2 . fillPoly ( img , pts = co_sep , color = labels_rgb_color [ config_params [ ' separatorregion ' ] ] )
if ' tableregion ' in keys :
img_poly = cv2 . fillPoly ( img , pts = co_table , color = labels_rgb_color [ config_params [ ' tableregion ' ] ] )
if ' noiseregion ' in keys :
img_poly = cv2 . fillPoly ( img , pts = co_noise , color = labels_rgb_color [ config_params [ ' noiseregion ' ] ] )
if ' textregions ' in keys :
if " paragraph " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_paragraph , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' paragraph ' ] ] )
if " footnote " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_footnote , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' footnote ' ] ] )
if " footnote-continued " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_footnote_con , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' footnote-continued ' ] ] )
if " heading " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_heading , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' heading ' ] ] )
if " header " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_header , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' header ' ] ] )
if " catch-word " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_catch , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' catch-word ' ] ] )
if " signature-mark " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_signature_mark , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' signature-mark ' ] ] )
if " page-number " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_page_number , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' page-number ' ] ] )
if " marginalia " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_marginalia , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' marginalia ' ] ] )
if " drop-capital " in types_text :
img_poly = cv2 . fillPoly ( img , pts = co_text_drop , color = labels_rgb_color [ config_params [ ' textregions ' ] [ ' drop-capital ' ] ] )
if " artificial_class_on_boundry " in keys :
img_poly [ : , : , 0 ] [ img_boundary [ : , : ] == 1 ] = artificial_class_rgb_color [ 0 ]
img_poly [ : , : , 1 ] [ img_boundary [ : , : ] == 1 ] = artificial_class_rgb_color [ 1 ]
img_poly [ : , : , 2 ] [ img_boundary [ : , : ] == 1 ] = artificial_class_rgb_color [ 2 ]
elif self . output_type == ' 2d ' :
if ' graphicregions ' in keys :
if " handwritten-annotation " in types_graphic :
color_label = config_params [ ' graphicregions ' ] [ ' handwritten-annotation ' ]
img_poly = cv2 . fillPoly ( img , pts = co_graphic_text_annotation , color = ( color_label , color_label , color_label ) )
if " signature " in types_graphic :
color_label = config_params [ ' graphicregions ' ] [ ' signature ' ]
img_poly = cv2 . fillPoly ( img , pts = co_graphic_signature , color = ( color_label , color_label , color_label ) )
if " decoration " in types_graphic :
color_label = config_params [ ' graphicregions ' ] [ ' decoration ' ]
img_poly = cv2 . fillPoly ( img , pts = co_graphic_decoration , color = ( color_label , color_label , color_label ) )
if " stamp " in types_graphic :
color_label = config_params [ ' graphicregions ' ] [ ' stamp ' ]
img_poly = cv2 . fillPoly ( img , pts = co_graphic_stamp , color = ( color_label , color_label , color_label ) )
if ' imageregion ' in keys :
color_label = config_params [ ' imageregion ' ]
img_poly = cv2 . fillPoly ( img , pts = co_img , color = ( color_label , color_label , color_label ) )
if ' separatorregion ' in keys :
color_label = config_params [ ' separatorregion ' ]
img_poly = cv2 . fillPoly ( img , pts = co_sep , color = ( color_label , color_label , color_label ) )
if ' tableregion ' in keys :
color_label = config_params [ ' tableregion ' ]
img_poly = cv2 . fillPoly ( img , pts = co_table , color = ( color_label , color_label , color_label ) )
if ' noiseregion ' in keys :
color_label = config_params [ ' noiseregion ' ]
img_poly = cv2 . fillPoly ( img , pts = co_noise , color = ( color_label , color_label , color_label ) )
if ' textregions ' in keys :
if " paragraph " in types_text :
color_label = config_params [ ' textregions ' ] [ ' paragraph ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_paragraph , color = ( color_label , color_label , color_label ) )
if " footnote " in types_text :
color_label = config_params [ ' textregions ' ] [ ' footnote ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_footnote , color = ( color_label , color_label , color_label ) )
if " footnote-continued " in types_text :
color_label = config_params [ ' textregions ' ] [ ' footnote-continued ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_footnote_con , color = ( color_label , color_label , color_label ) )
if " heading " in types_text :
color_label = config_params [ ' textregions ' ] [ ' heading ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_heading , color = ( color_label , color_label , color_label ) )
if " header " in types_text :
color_label = config_params [ ' textregions ' ] [ ' header ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_header , color = ( color_label , color_label , color_label ) )
if " catch-word " in types_text :
color_label = config_params [ ' textregions ' ] [ ' catch-word ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_catch , color = ( color_label , color_label , color_label ) )
if " signature-mark " in types_text :
color_label = config_params [ ' textregions ' ] [ ' signature-mark ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_signature_mark , color = ( color_label , color_label , color_label ) )
if " page-number " in types_text :
color_label = config_params [ ' textregions ' ] [ ' page-number ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_page_number , color = ( color_label , color_label , color_label ) )
if " marginalia " in types_text :
color_label = config_params [ ' textregions ' ] [ ' marginalia ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_marginalia , color = ( color_label , color_label , color_label ) )
if " drop-capital " in types_text :
color_label = config_params [ ' textregions ' ] [ ' drop-capital ' ]
img_poly = cv2 . fillPoly ( img , pts = co_text_drop , color = ( color_label , color_label , color_label ) )
if " artificial_class_on_boundry " in keys :
img_poly [ : , : ] [ img_boundary [ : , : ] == 1 ] = artificial_class_label
try :
cv2 . imwrite ( self . output_dir + ' / ' + self . gt_list [ index ] . split ( ' - ' ) [ 1 ] . split ( ' . ' ) [ 0 ] + ' .png ' , img_poly )
except :
cv2 . imwrite ( self . output_dir + ' / ' + self . gt_list [ index ] . split ( ' . ' ) [ 0 ] + ' .png ' , img_poly )
def run ( self , config_params ) :
self . get_content_of_dir ( )
self . get_images_of_ground_truth ( config_params )
@click.command ( )
@click.option (
" --dir_xml " ,
" -dx " ,
help = " directory of GT page-xml files " ,
type = click . Path ( exists = True , file_okay = False ) ,
@click.option (
" --dir_out " ,
" -do " ,
help = " directory where ground truth images would be written " ,
type = click . Path ( exists = True , file_okay = False ) ,
@click.option (
" --config " ,
" -cfg " ,
help = " config file of prefered layout or use case. " ,
type = click . Path ( exists = True , dir_okay = False ) ,
@click.option (
" --type_output " ,
" -to " ,
help = " this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values. " ,
def main ( dir_xml , dir_out , type_output , config ) :
if config :
with open ( config ) as f :
config_params = json . load ( f )
else :
print ( " passed " )
config_params = None
x = pagexml2label ( dir_xml , dir_out , type_output , config )
x . run ( config_params )
if __name__ == " __main__ " :
main ( )