From dec2739a50aeb6367010c1f4abb25090cc18c65c Mon Sep 17 00:00:00 2001 From: b-vr103 Date: Thu, 2 May 2019 18:41:16 +0200 Subject: [PATCH] rm pagexml2imag --- .project | 24 ---- .pydevproject | 8 -- build/lib/pagexml2img/__init__.py | 1 - build/lib/pagexml2img/pagexml2img.py | 160 ---------------------- dist/pagexml2img-1.0-py3.6.egg | Bin 5703 -> 0 bytes pagexml2img.egg-info/PKG-INFO | 11 -- pagexml2img.egg-info/SOURCES.txt | 7 - pagexml2img.egg-info/dependency_links.txt | 1 - pagexml2img.egg-info/top_level.txt | 1 - 9 files changed, 213 deletions(-) delete mode 100644 .project delete mode 100644 .pydevproject delete mode 100644 build/lib/pagexml2img/__init__.py delete mode 100644 build/lib/pagexml2img/pagexml2img.py delete mode 100644 dist/pagexml2img-1.0-py3.6.egg delete mode 100644 pagexml2img.egg-info/PKG-INFO delete mode 100644 pagexml2img.egg-info/SOURCES.txt delete mode 100644 pagexml2img.egg-info/dependency_links.txt delete mode 100644 pagexml2img.egg-info/top_level.txt diff --git a/.project b/.project deleted file mode 100644 index 6fbe82f..0000000 --- a/.project +++ /dev/null @@ -1,24 +0,0 @@ - - - toolbox_pagexml2img - - - - - - org.python.pydev.PyDevBuilder - - - - - - org.python.pydev.pythonNature - - - - setup.py - 1 - /home/vahid/workspace/toolbox_pagexml2img/setup.py - - - diff --git a/.pydevproject b/.pydevproject deleted file mode 100644 index aa7a29a..0000000 --- a/.pydevproject +++ /dev/null @@ -1,8 +0,0 @@ - - - - /${PROJECT_DIR_NAME} - - python interpreter - Default - diff --git a/build/lib/pagexml2img/__init__.py b/build/lib/pagexml2img/__init__.py deleted file mode 100644 index 95912a5..0000000 --- a/build/lib/pagexml2img/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pagexml2img import * \ No newline at end of file diff --git a/build/lib/pagexml2img/pagexml2img.py b/build/lib/pagexml2img/pagexml2img.py deleted file mode 100644 index 8d521f9..0000000 --- a/build/lib/pagexml2img/pagexml2img.py +++ /dev/null @@ -1,160 +0,0 @@ -#! /usr/bin/env python3 - -__version__= '1.0' - -import argparse -import sys -import os -import numpy as np -import warnings -import xml.etree.ElementTree as ET -from tqdm import tqdm -import cv2 - -with warnings.catch_warnings(): - warnings.simplefilter("ignore") - -__doc__=\ -""" -tool to extract 2d or 3d RGB images from page xml data. In former case output will be 1 -2D image array which each class has filled with a pixel value. In the case of 3D RGB image -each class will be defined with a RGB value and beside images a text file of classes also will be produced. -This classes.txt file is required for dhsegment tool. -""" - -class pagexml2img: - def __init__(self,dir_in, out_dir,output_type): - self.dir=dir_in - self.output_dir=out_dir - self.output_type=output_type - - def get_content_of_dir(self): - """ - Listing all ground truth page xml files. All files are needed to have xml format. - """ - - gt_all=os.listdir(self.dir) - self.gt_list=[file for file in gt_all if file.split('.')[ len(file.split('.'))-1 ]=='xml' ] - - def get_images_of_ground_truth(self): - """ - Reading the page xml files and write the ground truth images into given output directory. - """ - - if self.output_type=='3d' or self.output_type=='3D': - classes=np.array([ [0,0,0],[255,0,0],[0,255,0],[0,0,255]]) - - - - - for index in tqdm(range(len(self.gt_list))): - try: - tree1 = ET.parse(self.dir+'/'+self.gt_list[index]) - root1=tree1.getroot() - - - for jj in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Page'): - y_len=int(jj.attrib['imageHeight']) - x_len=int(jj.attrib['imageWidth']) - - co_text=[] - co_sep=[] - co_img=[] - - for nn in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}TextRegion'): - c_t_in=[] - for ll in nn.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Point'): - c_t_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) - co_text.append(np.array(c_t_in)) - - - for nn in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}ImageRegion'): - c_i_in=[] - for ll in nn.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Point'): - c_i_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) - co_img.append(np.array(c_i_in)) - - for nn in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}SeparatorRegion'): - c_s_in=[] - for ll in nn.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Point'): - c_s_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) - co_sep.append(np.array(c_s_in)) - - img = np.zeros( (y_len,x_len,3) ) - img_poly=cv2.fillPoly(img, pts =co_text, color=(255,0,0)) - img_poly=cv2.fillPoly(img, pts =co_img, color=(0,255,0)) - img_poly=cv2.fillPoly(img, pts =co_sep, color=(0,0,255)) - - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - except: - pass - np.savetxt(self.output_dir+'/../classes.txt',classes) - - if self.output_type=='2d' or self.output_type=='2D': - - for index in tqdm(range(len(self.gt_list))): - try: - - tree1 = ET.parse(self.dir+'/'+self.gt_list[index]) - root1=tree1.getroot() - - for jj in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Page'): - y_len=int(jj.attrib['imageHeight']) - x_len=int(jj.attrib['imageWidth']) - - co_text=[] - co_sep=[] - co_img=[] - - for nn in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}TextRegion'): - c_t_in=[] - for ll in nn.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Point'): - c_t_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) - co_text.append(np.array(c_t_in)) - - - for nn in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}ImageRegion'): - c_i_in=[] - for ll in nn.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Point'): - c_i_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) - co_img.append(np.array(c_i_in)) - - for nn in root1.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}SeparatorRegion'): - c_s_in=[] - for ll in nn.iter('{http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19}Point'): - c_s_in.append([ int(np.float(ll.attrib['x'])) , int(np.float(ll.attrib['y'])) ]) - co_sep.append(np.array(c_s_in)) - - img = np.zeros( (y_len,x_len) ) - img_poly=cv2.fillPoly(img, pts =co_text, color=(1,1,1)) - img_poly=cv2.fillPoly(img, pts =co_img, color=(2,2,2)) - img_poly=cv2.fillPoly(img, pts =co_sep, color=(3,3,3)) - try: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('-')[1].split('.')[0]+'.png',img_poly ) - except: - cv2.imwrite(self.output_dir+'/'+self.gt_list[index].split('.')[0]+'.png',img_poly ) - except: - pass - def run(self): - self.get_content_of_dir() - self.get_images_of_ground_truth() -def main(): - parser=argparse.ArgumentParser() - - parser.add_argument('-dir_in','--dir_in', dest='inp1', default=None, help='directory of page-xml files') - parser.add_argument('-dir_out','--dir_out', dest='inp2', default=None, help='directory where ground truth images would be written') - parser.add_argument('-type','--type', dest='inp3', default=None, help='this defines how output should be. A 2d image array or a 3d image array encoded with RGB color. Just pass 2d or 3d. The file will be saved one directory up. 2D image array is 3d but only information of one channel would be enough since all channels have the same values.') - options=parser.parse_args() - - possibles=globals() - possibles.update(locals()) - x=pagexml2img(options.inp1,options.inp2,options.inp3) - x.run() -if __name__=="__main__": - main() - - - diff --git a/dist/pagexml2img-1.0-py3.6.egg b/dist/pagexml2img-1.0-py3.6.egg deleted file mode 100644 index c8c852812e0b62c2a436be1826de30eded480a8d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5703 zcma)=2T;@9w#AX&0!Wiyq)AtL@1ge=kZvFWL&8;-&tppnSJ&$)WN}}#=^oP#6qSN>kDM)PKIHgK4I!Q zrhpn6f?E3O2Ev9q|NijcX(uuE9g3(kIiRgQd84ksGkb+}PgkJyv}lr^N*%%CT(z00 zz($To7Bi8p5yeOEBJv#ZweZb$??60UJijSk`GpL5H}8f7yMfGyu4Vv5Cag z??e2@u8Dz}u`19+$S1(Zcc_P0$&ey?rPJRt9Ym3$;wb;bGshY{MByPU`($D3=TdCj zljIMnW2<-ClOT*(6sg!97swE<$f+NFC`wcKHWX+aGF|5O<(7Lq%MSoP_1d*o#h&X1 zU%Ea5yH^tSxNrRJDHVTO1P9~g8m5>pdvbJwIk`JJxr5<$ZZ7Wky?>iSVPpNRMieX4 zl3`S&n4-O`_VIw(xjFecx&4;$Yw8-*42<3x8qwRhO1QlLUq4}>3rx`40pj%6o}OmK z`X}AL(2SV9=rMHzQ!odpQ@{f^5tj#0VLLk)cNZT!J0Tcchkt0??AD*1K?5U#1Hybp z6PFz{WBx<|VHfFQ_*hs1ELd0!S9q7$804TF#zimA+PD#Z&bRqnxQ=e=HA^i*fjnt;D#x-VE@sx&8lH^d=CmLx)Ur7E0jG<&%E z#lHI2>acFnB*7PHL0qLMZW*fQ?_&zyRyY-TPaHHa@9YS`?&_?HtuR6@_6A z7rV|`V6RfFR>LPJ)J{w76O0b^lhY;cyK9aK@U(gQ;%pQ14gz>S*rVHQ02!igI(vM{ zpeWJPR2_(5r4qE88?M}89{=%PFITHm-AaCEbpY}q@bgRm0L@p(vKtm*#jZ#ao`?ET zmDro+)E(0?*9nG-VdJw$Jq|7>-L(>J&R(_?%WafR1*$e(Wc{QUGlV!ddq_(w1ccw^ z;qGd6Iz`g5&jLmysJ_(sQYdj*a;L#8X)T37TI{69RATq=cT2?~pPR(@b7RT%pDXFl zZ9PIQP;&ZPC%jK~YMUm*SL5=xtWC#Z7U@*MXPL_n`dI04!TBPR4W~rQm|-C-InSm0 zST(IMTYxLG?!F2WtXN98`yPcmP5bU=Wr{l*OeUG+#)N+%B zI{YU%YI01G5s%YZi<&YJKz0BMFWDOS4Ig#HYgy((IG|D|*Pzd+;GAoY8o3`dq`fcA zKcP)G_$cP90~{KK$`7zd29w;REKzn&&v7Fx@o+?lL=Y-|Bg1oGD_s1fvso{aAAX{E5o#-ZBe4u>~$qlD4lg*px%hMggucgrfF>OH8LrwL<0 zJ6WJ_y#ya8kIJyeR}T`0T7OJB-aCqYNnZnKnTHK$5As*(Nw>BIok5IcWP2*M{MH@) z?sQ6ZWfd57ogER!^m5>D7d*g6>#|1_le6CP9($Yv+(c~?2n`KIbKrMFMcdFIqZU`k zEhE3yv4aY|8^d?%)=!A|n6i--B^l=1gMz_;)1@OyuLu+Ln5Y!X$1HAnIk_0>u%NnI z6U)y{UinSjs=i-KJTtLrF|)*9nH^WkB-e2b+0RM;+(Z6cZQf^Wj|%|RBw zktN+iwwMQX*3#IGG`^xq3^0#}Pbu2H&*_3mozizOv7(I5P69uM+(|^dd)QZk-hW7~ zf@9m+jq-PIc&fR!r0^s(k@G04uVlFH?(hR*I|=G8+}c8_9%mI-sMc&A50QQPY^hOZ zSbW$+T*;-aEMMu74N;sgyLXOu)U%#)3e16!yr+d0uhCl$KEmy+I?g|s&`gwbTV9Hz zofdBhU22AIZ%y7rh9ByU-S@w@6r@n4fs%?Yidj1j!(xZ7h+-=YB`g>D)P0+z<+drO z`K~l1$EcSL{buqxRJ!%twSHk^1T9^^(DQK2jn!+-8*-NWNdek)Q8I!aJe+mG?kXR2 zg%bVD=Pu9VIbd`+$?f`Y zoLT1fO5JlE>Cm$%Fcf@_n`1@sO#|&Z5wqz_(P=+tEH+f3w}y?p zY`!+Nyqm5%U?i-F-6zH^t>kS5h2xa6o#0Qn;H~hRTEW9X#DOH48#E@d!+E1E@AWLN z^P7$ZSC;2E%pC&Eg#@gQn%+ZPpjI{a4ud~6QE=WiV9c*sx< zl_dB%X4~^7R)^dR{N=zr&RF*B9bd><9=thawKfhX{H-KjMZJLDcpa&vZbn|CY}tZ* z4;kd(O1IrJU=WGK^q3M%318_oc6Km0*a7V9WM}uMkAY!uA7>ADK~V`z{{f4BUSFSW zLYu~O;^6dx@j1J3`^c26M{qce>5S>T=~U>9vm)rlvh5<)#OXcmxPHH;wHMKM{4sW( zsbpmSIGq$Ipd@|!>o8}fm?#fWk(aB!MNODT^ksueGXf#7IHIN~E^ouCl^GHg1hIu^ z1$EdeuM3jp%>PV#pKB_sp0v)EbEW^2cTz!-L|9n$nCpzl|8btnee~}my`{5Boa;{> z7B*rnKRa08@wR?MI6+P$y_lIS1*GV!wc&YS-;(go`>_o7k5Gd3i;^*&Br2#G3IK+c0&TN+nG`x~j``kDr{zwI$A<+V%YqRkS&GRlAzt-QePu zVYle1H-_Ic-QdMo;wMzk%~)OL#-_Zfzq!Y)wQYW|b&pZbg?-+?ZV{!KGC=!eG$!Kg zfy2I`>AQXHPn&S58;P|D46aJjEl6RaBS#CZ|PDp&ozI54ct6?%?ilk6qb2W z8f?G6D3gq=9Zk*_Dcxab6{ith(hsTz2+%iP#9)qh&N;bKP~}_aND#HbE}OM5)lSz zSw2D$YM8L5DpmzE<4$?4%4vAD;^*FVb$4{3j%xaj3sJyqPFz$mCL_9vNR7(yk<`SF2>1?|wo>ajl772qFB@O;4q; z5z04?w^cq#GxU=8`*u%qx^DzH%x_8nam*W}Y_YUYd|kM_v(&?hOgm1` zE@n*nX-BXC@r;iN=9kcE4xG}0npFil`lBnO()!b3CPz+E@*L;n;h>teNr8ZC=-GHH z?}WKkl6hLYlsUU_tH!k^DHJ+W9`Cje*xR}&j)1`#t|j0P>em6s*_P=TG@{n)>`Go1 zY=gGai$i^wZ{M)@2W|Mbo-x?Od}z5@M1tkga^yp!E?{8l^ZM(YTF-^#hyrx1T>FJW zRQ9o9~VFLcg**Z8#6VF z{suc}C3&aDC0TGIXxw6#@j|%zDWggCTLtHlh}+)G9jWp^?tMPq6D=r~eGA)ZAV_~b z$xe=!%4+a^R%D78d{C}@n)UVr;dP;_Am)Kuu@qQ>uksU9ieNoq1-`Lssy0QKB`QBG~rFLK5 z!AAn5GKiYNF+ffmeEib+;jQ?44z6K}w zh>$gd*P2<^@L!04MaaDPDe4b6ueXwY9`y!DAjN~(16m4lLY~!SeePFIJH08X9;VHyvrlk#mv6Le+na1^nYWlaHhQK8H7 zU%!8^ic~O}*E?IR^5;!gq%zJ;ua{%nx~Rz0ErDe&T6)1T5*)kzCcdm+eIbc(Tic@! z7RR*f2*CIk@}Cu_h%cJn{RtlV5DC;k@;ezXHBW(i^chafnmYp1efmyqZ#=Ull!L;) zq<-h_8zk-<={Jd_mZGZH>o(hfiQg>;bPJBa7$(?M9^GZGwo%QZ!VAu?P)YzRSE^L% ziP0y1ga*1TGMh=nzVLEOr}Lx4TjLOPmKl72EWL^CM+$`*)TS58XU> z2$q%BhY`nk*!#q%4E0orF672Yg`-;R6zu8XB%vo9oBhBs;$|^d+VW=E5T^P~$8!OF zd}ZVqqGi-E#X4aiw(o`w9KXr3qrg$WI@wBrd~G31(&NBBY@g}dnpw7FkhXkR-@3Ro z;aQS7zEK@HFVvmPc$*|62T07Pm$85Egv@-p+?;%?!qU1M&ZXsvmu;n&DnRnBhwpUV zTfb=WsP~lq?wPhH>7-disE)W+xr!=ydyC|QdOh88itp4f&WvmQQch7 z<^juHK-^EvHTN1eHO~K@%wg_^|FcTNf4}`F`1&WOyDGn$l>J+bg;g5<^0NF=Zg!P$ zIam6fYyGoI!&Na+*MAKBx0LHD>T1aK2Nix9^;hI|m2uh5@0jbKRT|!Vnek8Pby@!3 zGOs`K6O8=Q?fTE;0E1tK{I{U%4+Mz)H{=o!Gj?3&{7zl34l#c?uL=L={69vTtMIE6 r#UFS$X3n@|40F;y^U|g7h5nu|40Z4@*&Y@a5$2VRIc65s@27tON@(Ek diff --git a/pagexml2img.egg-info/PKG-INFO b/pagexml2img.egg-info/PKG-INFO deleted file mode 100644 index 31ddbdf..0000000 --- a/pagexml2img.egg-info/PKG-INFO +++ /dev/null @@ -1,11 +0,0 @@ -Metadata-Version: 1.0 -Name: pagexml2img -Version: 1.0 -Summary: UNKNOWN -Home-page: UNKNOWN -Author: UNKNOWN -Author-email: UNKNOWN -License: UNKNOWN -Description-Content-Type: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN diff --git a/pagexml2img.egg-info/SOURCES.txt b/pagexml2img.egg-info/SOURCES.txt deleted file mode 100644 index b24fdb8..0000000 --- a/pagexml2img.egg-info/SOURCES.txt +++ /dev/null @@ -1,7 +0,0 @@ -setup.py -pagexml2img/__init__.py -pagexml2img/pagexml2img.py -pagexml2img.egg-info/PKG-INFO -pagexml2img.egg-info/SOURCES.txt -pagexml2img.egg-info/dependency_links.txt -pagexml2img.egg-info/top_level.txt \ No newline at end of file diff --git a/pagexml2img.egg-info/dependency_links.txt b/pagexml2img.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/pagexml2img.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/pagexml2img.egg-info/top_level.txt b/pagexml2img.egg-info/top_level.txt deleted file mode 100644 index 687af0e..0000000 --- a/pagexml2img.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -pagexml2img