Update main.py

pull/1/head
Rezanezhad, Vahid 5 years ago
parent ca9f47eb20
commit 1702472401

@ -1155,6 +1155,7 @@ class textlineerkenner:
elif slope_for_all == 999: elif slope_for_all == 999:
slope_for_all = slope_biggest slope_for_all = slope_biggest
self.slopes.append(slope_for_all) self.slopes.append(slope_for_all)
def order_of_regions(self, textline_mask,contours_main): def order_of_regions(self, textline_mask,contours_main):
mada_n=textline_mask.sum(axis=1) mada_n=textline_mask.sum(axis=1)
y=mada_n[:] y=mada_n[:]
@ -1168,6 +1169,7 @@ class textlineerkenner:
sigma_gaus=8 sigma_gaus=8
z= gaussian_filter1d(y_help, sigma_gaus) z= gaussian_filter1d(y_help, sigma_gaus)
zneg_rev=-y_help+np.max(y_help) zneg_rev=-y_help+np.max(y_help)
@ -1182,6 +1184,8 @@ class textlineerkenner:
peaks_neg=peaks_neg-20-20 peaks_neg=peaks_neg-20-20
peaks=peaks-20 peaks=peaks-20
if contours_main!=None: if contours_main!=None:
areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))]) areas_main=np.array([cv2.contourArea(contours_main[j]) for j in range(len(contours_main))])
M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))] M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]
@ -1192,38 +1196,66 @@ class textlineerkenner:
y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_min_main=np.array([np.min(contours_main[j][:,0,1]) for j in range(len(contours_main))])
y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))]) y_max_main=np.array([np.max(contours_main[j][:,0,1]) for j in range(len(contours_main))])
#print(contours_main[0],np.shape(contours_main[0]),contours_main[0][:,0,0])
if contours_main!=None: if contours_main!=None:
indexer_main=np.array(range(len(contours_main))) indexer_main=np.array(range(len(contours_main)))
if contours_main!=None: if contours_main!=None:
len_main=len(contours_main) len_main=len(contours_main)
else: else:
len_main=0 len_main=0
matrix_of_orders=np.zeros((len_main,5)) matrix_of_orders=np.zeros((len_main,5))
matrix_of_orders[:,0]=np.array( range( len_main ) ) matrix_of_orders[:,0]=np.array( range( len_main ) )
matrix_of_orders[:len_main,1]=1 matrix_of_orders[:len_main,1]=1
matrix_of_orders[len_main:,1]=2 matrix_of_orders[len_main:,1]=2
matrix_of_orders[:len_main,2]=cx_main matrix_of_orders[:len_main,2]=cx_main
matrix_of_orders[:len_main,3]=cy_main matrix_of_orders[:len_main,3]=cy_main
matrix_of_orders[:len_main,4]=np.array( range( len_main ) ) matrix_of_orders[:len_main,4]=np.array( range( len_main ) )
#matrix_of_orders[len_main:,4]=np.array( range( len_head ) )
#print(matrix_of_orders)
peaks_neg_new=[]
peaks_neg_new.append(0)
for iii in range(len(peaks_neg)):
peaks_neg_new.append(peaks_neg[iii])
peaks_neg_new.append(textline_mask.shape[0])
final_indexers_sorted=[] final_indexers_sorted=[]
for i in range(len(peaks_neg)-1): for i in range(len(peaks_neg_new)-1):
top=peaks_neg[i] top=peaks_neg_new[i]
down=peaks_neg[i+1] down=peaks_neg_new[i+1]
indexes_in=matrix_of_orders[:,0][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]<down))] indexes_in=matrix_of_orders[:,0][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]<down))]
cxs_in=matrix_of_orders[:,2][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]<down))] cxs_in=matrix_of_orders[:,2][(matrix_of_orders[:,3]>=top) & ((matrix_of_orders[:,3]<down))]
sorted_inside=np.argsort(cxs_in) sorted_inside=np.argsort(cxs_in)
ind_in_int=indexes_in[sorted_inside] ind_in_int=indexes_in[sorted_inside]
for j in range(len(ind_in_int)): for j in range(len(ind_in_int)):
final_indexers_sorted.append(int(ind_in_int[j]) ) final_indexers_sorted.append(int(ind_in_int[j]) )
return final_indexers_sorted, matrix_of_orders return final_indexers_sorted, matrix_of_orders
@ -1275,7 +1307,7 @@ class textlineerkenner:
# img_v=np.zeros(text_patch_processed.shape) # img_v=np.zeros(text_patch_processed.shape)
# img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255)) # img_v=cv2.fillPoly(img_v, pts =found_polygons, color=(255,255,255))
# sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1) # sumi=np.sum(np.sum(self.all_text_images[jj],axis=2),axis=1)
"""
def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts): def write_into_page_xml(self,contours,page_coord,dir_of_image,order_of_texts , id_of_texts):
found_polygons_text_region=contours found_polygons_text_region=contours
@ -1315,7 +1347,7 @@ class textlineerkenner:
page.set('textLineOrder',"top-to-bottom" ) page.set('textLineOrder',"top-to-bottom" )
"""
page_print_sub=ET.SubElement(page, 'PrintSpace') page_print_sub=ET.SubElement(page, 'PrintSpace')
coord_page = ET.SubElement(page_print_sub, 'Coords') coord_page = ET.SubElement(page_print_sub, 'Coords')
points_page_print='' points_page_print=''
@ -1334,7 +1366,7 @@ class textlineerkenner:
points_page_print=points_page_print+' ' points_page_print=points_page_print+' '
#print(points_co) #print(points_co)
coord_page.set('points',points_page_print) coord_page.set('points',points_page_print)
"""
@ -1440,7 +1472,7 @@ class textlineerkenner:
tree = ET.ElementTree(data) tree = ET.ElementTree(data)
tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") tree.write(os.path.join(self.dir_out, self.f_name) + ".xml")
""" """
def write_into_page_xml(self, contours, page_coord): def write_into_page_xml(self, contours, page_coord):
found_polygons_text_region = contours found_polygons_text_region = contours
@ -1551,6 +1583,7 @@ class textlineerkenner:
tree = ET.ElementTree(data) tree = ET.ElementTree(data)
tree.write(os.path.join(self.dir_out, self.f_name) + ".xml") tree.write(os.path.join(self.dir_out, self.f_name) + ".xml")
"""
def run(self): def run(self):
self.get_image_and_scales() self.get_image_and_scales()
@ -1560,13 +1593,13 @@ class textlineerkenner:
self.get_all_image_patches_based_on_text_regions(boxes,image_page) self.get_all_image_patches_based_on_text_regions(boxes,image_page)
textline_mask_tot=self.textline_contours(image_page) textline_mask_tot=self.textline_contours(image_page)
#indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours)
#order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted )
self.get_textlines_for_each_textregions(textline_mask_tot,boxes) self.get_textlines_for_each_textregions(textline_mask_tot,boxes)
self.get_slopes_for_each_text_region(contours) self.get_slopes_for_each_text_region(contours)
self.deskew_textline_patches(contours, boxes) self.deskew_textline_patches(contours, boxes)
self.write_into_page_xml(contours,page_coord,self.dir_out ) self.write_into_page_xml(contours,page_coord,self.dir_out , order_of_texts , id_of_texts)
@click.command() @click.command()

Loading…
Cancel
Save