Update main.py

pull/1/head
Rezanezhad, Vahid 5 years ago
parent f94511a1d8
commit 719824f19d

@ -416,7 +416,7 @@ class textlineerkenner:
img = self.otsu_copy(self.image) img = self.otsu_copy(self.image)
for ii in range(60): for ii in range(1):
img = cv2.GaussianBlur(img, (15, 15), 0) img = cv2.GaussianBlur(img, (15, 15), 0)
@ -487,7 +487,7 @@ class textlineerkenner:
img = self.otsu_copy(img) img = self.otsu_copy(img)
img = img.astype(np.uint8) img = img.astype(np.uint8)
###img = cv2.medianBlur(img,5) img = cv2.medianBlur(img,5)
# img = cv2.medianBlur(img,5) # img = cv2.medianBlur(img,5)
@ -665,22 +665,22 @@ class textlineerkenner:
contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
boxes = [] self.boxes = []
contours_new = [] contours_new = []
for jj in range(len(contours)): for jj in range(len(contours)):
if hirarchy[0][jj][2] == -1: if hirarchy[0][jj][2] == -1:
x, y, w, h = cv2.boundingRect(contours[jj]) x, y, w, h = cv2.boundingRect(contours[jj])
boxes.append([x, y, w, h]) self.boxes.append([x, y, w, h])
contours_new.append(contours[jj]) contours_new.append(contours[jj])
return boxes, contours_new return contours_new
def get_all_image_patches_based_on_text_regions(self, boxes, image_page): def get_all_image_patches_coordination(self, image_page):
self.all_box_coord=[] self.all_box_coord=[]
for jk in range(len(boxes)): for jk in range(len(self.boxes)):
crop_img,crop_coor=self.crop_image_inside_box(boxes[jk],image_page) _,crop_coor=self.crop_image_inside_box(self.boxes[jk],image_page)
self.all_box_coord.append(crop_coor) self.all_box_coord.append(crop_coor)
del crop_img
def textline_contours(self, img): def textline_contours(self, img):
model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir) model_textline, session_textline = self.start_new_session_and_model(self.model_textline_dir)
@ -702,7 +702,7 @@ class textlineerkenner:
width = img_width_textline width = img_width_textline
height = img_height_textline height = img_height_textline
#img = self.otsu_copy(img) img = self.otsu_copy(img)
img = img.astype(np.uint8) img = img.astype(np.uint8)
# for _ in range(4): # for _ in range(4):
img = cv2.medianBlur(img,5) img = cv2.medianBlur(img,5)
@ -915,8 +915,8 @@ class textlineerkenner:
self.all_text_region_raw.append(crop_img[:, :, 0]) self.all_text_region_raw.append(crop_img[:, :, 0])
self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1]) self.area_of_cropped.append(crop_img.shape[0] * crop_img.shape[1])
def seperate_lines(self, mada, contour_text_interest, thetha): def seperate_lines(self, img_path, contour_text_interest, thetha):
(h, w) = mada.shape[:2] (h, w) = img_path.shape[:2]
center = (w // 2, h // 2) center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, -thetha, 1.0) M = cv2.getRotationMatrix2D(center, -thetha, 1.0)
x_d = M[0, 2] x_d = M[0, 2]
@ -932,13 +932,13 @@ class textlineerkenner:
y_cont = y_cont - np.min(y_cont) y_cont = y_cont - np.min(y_cont)
x_min_cont = 0 x_min_cont = 0
x_max_cont = mada.shape[1] x_max_cont = img_path.shape[1]
y_min_cont = 0 y_min_cont = 0
y_max_cont = mada.shape[0] y_max_cont = img_path.shape[0]
xv = np.linspace(x_min_cont, x_max_cont, 1000) xv = np.linspace(x_min_cont, x_max_cont, 1000)
mada_n = mada.sum(axis=1) mada_n = img_path.sum(axis=1)
first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None)) first_nonzero = 0 # (next((i for i, x in enumerate(mada_n) if x), None))
@ -988,11 +988,11 @@ class textlineerkenner:
point_down_narrow = peaks[jj] + first_nonzero + int( point_down_narrow = peaks[jj] + first_nonzero + int(
1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2) 1.1 * dis_to_next_down) ###-int(dis_to_next_down*1./2)
if point_down >= mada.shape[0]: if point_down >= img_path.shape[0]:
point_down = mada.shape[0] - 2 point_down = img_path.shape[0] - 2
if point_down_narrow >= mada.shape[0]: if point_down_narrow >= img_path.shape[0]:
point_down_narrow = mada.shape[0] - 2 point_down_narrow = img_path.shape[0] - 2
distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
for mj in range(len(xv))] for mj in range(len(xv))]
@ -1088,8 +1088,8 @@ class textlineerkenner:
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
elif jj == 1: elif jj == 1:
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
if point_down >= mada.shape[0]: if point_down >= img_path.shape[0]:
point_down = mada.shape[0] - 2 point_down = img_path.shape[0] - 2
point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True) distances = [cv2.pointPolygonTest(contour_text_interest_copy, (xv[mj], peaks[jj] + first_nonzero), True)
@ -1148,8 +1148,8 @@ class textlineerkenner:
dis_to_next = peaks[jj] - peaks[jj - 1] dis_to_next = peaks[jj] - peaks[jj - 1]
# point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next) # point_down=peaks[jj]+first_nonzero+int(1./3*dis_to_next)
point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next) point_down = peaks[jj] + first_nonzero + int(1. / 1.9 * dis_to_next)
if point_down >= mada.shape[0]: if point_down >= img_path.shape[0]:
point_down = mada.shape[0] - 2 point_down = img_path.shape[0] - 2
# point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next) # point_up=peaks[jj]+first_nonzero-int(1./3*dis_to_next)
point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next) point_up = peaks[jj] + first_nonzero - int(1. / 1.9 * dis_to_next)
else: else:
@ -1191,6 +1191,8 @@ class textlineerkenner:
point_up_rot1=0 point_up_rot1=0
if point_up_rot2<0: if point_up_rot2<0:
point_up_rot2=0 point_up_rot2=0
textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)], textline_boxes_rot.append(np.array([[int(x_min_rot1), int(point_up_rot1)],
[int(x_max_rot2), int(point_up_rot2)], [int(x_max_rot2), int(point_up_rot2)],
@ -1202,13 +1204,27 @@ class textlineerkenner:
[int(x_max), int(point_down)], [int(x_max), int(point_down)],
[int(x_min), int(point_down)]])) [int(x_min), int(point_down)]]))
mada_new = np.zeros((mada.shape[0], mada.shape[1], 3)) mada_new = np.zeros((img_path.shape[0], img_path.shape[1], 3))
mada_new = cv2.fillPoly(mada_new, pts=textline_boxes, color=(255, 255, 255)) mada_new = cv2.fillPoly(mada_new, pts=textline_boxes, color=(255, 255, 255))
mada_new = mada_new.astype(np.uint8) mada_new = mada_new.astype(np.uint8)
return mada_new, peaks, textline_boxes_rot return mada_new, peaks, textline_boxes_rot
def ruturn_rotated_contours(self,slope,img_patch):
dst = self.rotate_image(img_patch, slope)
dst = dst.astype(np.uint8)
dst = dst[:, :, 0]
dst[dst != 0] = 1
imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0)
def textline_contours_postprocessing(self, textline_mask, img_patch, slope, contour_text_interest, box_ind): thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
return contours
def textline_contours_postprocessing(self, textline_mask, slope, contour_text_interest, box_ind):
textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255 textline_mask = np.repeat(textline_mask[:, :, np.newaxis], 3, axis=2) * 255
@ -1243,24 +1259,7 @@ class textlineerkenner:
contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_slope = contours # self.find_polygons_size_filter(contours,median_area=median_area,scaler_up=100,scaler_down=0.5) if len(contours) > 0:
if len(contours_slope) > 0:
for jv in range(len(contours_slope)):
new_poly = list(contours_slope[jv])
if jv == 0:
merged_all = new_poly
else:
merged_all = merged_all + new_poly
merge = np.array(merged_all)
img_in = np.zeros(textline_mask.shape)
img_p_in = cv2.fillPoly(img_in, pts=[merge], color=(255, 255, 255))
rect = cv2.minAreaRect(merge)
box = cv2.boxPoints(rect)
box = np.int0(box)
dst = self.rotate_image(textline_mask, slope) dst = self.rotate_image(textline_mask, slope)
dst = dst[:, :, 0] dst = dst[:, :, 0]
@ -1275,6 +1274,8 @@ class textlineerkenner:
img_contour = np.zeros((box_ind[3], box_ind[2], 3)) img_contour = np.zeros((box_ind[3], box_ind[2], 3))
img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255)) img_contour = cv2.fillPoly(img_contour, pts=[contour_text_copy], color=(255, 255, 255))
img_contour_rot = self.rotate_image(img_contour, slope) img_contour_rot = self.rotate_image(img_contour, slope)
img_contour_rot = img_contour_rot.astype(np.uint8) img_contour_rot = img_contour_rot.astype(np.uint8)
@ -1285,11 +1286,11 @@ class textlineerkenner:
len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))] len_con_text_rot = [len(contours_text_rot[ib]) for ib in range(len(contours_text_rot))]
ind_big_con = np.argmax(len_con_text_rot) ind_big_con = np.argmax(len_con_text_rot)
textline_maskt = textline_mask[:, :, 0]
textline_maskt[textline_maskt != 0] = 1
sep_img, _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope) sep_img, _, contours_rotated_clean = self.seperate_lines(dst, contours_text_rot[ind_big_con], slope)
dst = self.rotate_image(sep_img, -slope) dst = self.rotate_image(sep_img, -slope)
imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
@ -1510,43 +1511,46 @@ class textlineerkenner:
return ang_int return ang_int
def get_slopes_for_each_text_region(self, contours): def get_slopes_for_each_text_region(self, contours,textline_mask_tot):
# first lets find slope for biggest patch of text region (slope of deskewing) # first lets find slope for biggest patch of text region (slope of deskewing)
denoised=None
index_max_area = np.argmax(self.area_of_cropped) #index_max_area = np.argmax(self.area_of_cropped)
img_int_p=self.all_text_region_raw[index_max_area] #img_int_p=self.all_text_region_raw[index_max_area]
textline_con=self.return_contours_of_image(img_int_p) #textline_con=self.return_contours_of_image(img_int_p)
textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) #textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008)
y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) #y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil)
sigma_des=int( y_diff_mean * (4./40.0) ) #sigma_des=int( y_diff_mean * (4./40.0) )
#refrence : sigma =4 for diff=40 #refrence : sigma =4 for diff=40
if sigma_des<1: #if sigma_des<1:
sigma_des=1 # sigma_des=1
img_int_p[img_int_p>0]=1 #img_int_p[img_int_p>0]=1
slope_biggest=self.return_deskew_slop(img_int_p,sigma_des) slope_biggest=0#self.return_deskew_slop(img_int_p,sigma_des)
# this was the old method. By now it seems the new one works better. By the way more tests are required. # this was the old method. By now it seems the new one works better. By the way more tests are required.
#slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area], #slope_biggest = self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[index_max_area],
# denoised, contours[index_max_area]) # denoised, contours[index_max_area])
if np.abs(slope_biggest) > 20: #if np.abs(slope_biggest) > 20:
slope_biggest = 0 # slope_biggest = 0
self.slopes = [] self.slopes = []
for mv in range(len(self.all_text_region_raw)): for mv in range(len(self.boxes)):
img_int_p=self.all_text_region_raw[mv] textline_mask_tot = cv2.erode(textline_mask_tot, self.kernel, iterations=1)
crop_img, _ = self.crop_image_inside_box(self.boxes[mv],
np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
try: try:
textline_con=self.return_contours_of_image(img_int_p) textline_con=self.return_contours_of_image(crop_img)
textline_con_fil=self.filter_contours_area_of_image(img_int_p,textline_con,denoised,max_area=1,min_area=0.0008) textline_con_fil=self.filter_contours_area_of_image(crop_img,textline_con,denoised,max_area=1,min_area=0.0008)
y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil) y_diff_mean=self.find_contours_mean_y_diff(textline_con_fil)
sigma_des=int( y_diff_mean * (4./40.0) ) sigma_des=int( y_diff_mean * (4./40.0) )
@ -1554,22 +1558,22 @@ class textlineerkenner:
if sigma_des<1: if sigma_des<1:
sigma_des=1 sigma_des=1
img_int_p[img_int_p>0]=1 crop_img[crop_img>0]=1
slope_for_all=self.return_deskew_slop(img_int_p,sigma_des) slope_corresponding_textregion=self.return_deskew_slop(crop_img,sigma_des)
#old method #old method
#slope_for_all=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv]) #slope_corresponding_textregion=self.textline_contours_to_get_slope_correctly(self.all_text_region_raw[mv],denoised,contours[mv])
#text_patch_processed=textline_contours_postprocessing(gada) #text_patch_processed=textline_contours_postprocessing(gada)
except: except:
slope_for_all=999 slope_corresponding_textregion=999
if np.abs(slope_for_all)>12.5 and slope_for_all!=999: if np.abs(slope_corresponding_textregion)>12.5 and slope_corresponding_textregion!=999:
slope_for_all=slope_biggest slope_corresponding_textregion=slope_biggest
elif slope_for_all==999: elif slope_corresponding_textregion==999:
slope_for_all=slope_biggest slope_corresponding_textregion=slope_biggest
self.slopes.append(slope_for_all) self.slopes.append(slope_corresponding_textregion)
def order_of_regions(self, textline_mask,contours_main): def order_of_regions(self, textline_mask,contours_main):
@ -1687,17 +1691,19 @@ class textlineerkenner:
order_of_texts order_of_texts
return order_of_texts, id_of_texts return order_of_texts, id_of_texts
def deskew_textline_patches(self, contours, boxes): def deskew_textline_patches(self, contours,textline_mask_tot):
self.all_text_region_processed = [] self.all_text_region_processed = []
self.all_found_texline_polygons = [] self.all_found_texline_polygons = []
denoised=None
for jj in range(len(self.all_text_region_raw)):
text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(self.all_text_region_raw[jj] for jj in range(len(self.boxes)):
, denoised, self.slopes[jj],
contours[jj], boxes[jj]) crop_img, _ = self.crop_image_inside_box(self.boxes[jj],
np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
text_patch_processed, cnt_clean_rot = self.textline_contours_postprocessing(crop_img[:,:,0]
, self.slopes[jj],
contours[jj], self.boxes[jj])
self.all_text_region_processed.append(text_patch_processed) self.all_text_region_processed.append(text_patch_processed)
text_patch_processed = text_patch_processed.astype(np.uint8) text_patch_processed = text_patch_processed.astype(np.uint8)
@ -1826,6 +1832,10 @@ class textlineerkenner:
coord = ET.SubElement(textline, 'Coords') coord = ET.SubElement(textline, 'Coords')
texteq=ET.SubElement(textline, 'TextEquiv')
uni=ET.SubElement(texteq, 'Unicode')
uni.text = ' '
#points = ET.SubElement(coord, 'Points') #points = ET.SubElement(coord, 'Points')
@ -1855,6 +1865,10 @@ class textlineerkenner:
#print(points_co) #print(points_co)
coord.set('points',points_co) coord.set('points',points_co)
texteqreg=ET.SubElement(textregion, 'TextEquiv')
unireg=ET.SubElement(texteqreg, 'Unicode')
unireg.text = ' '
@ -1867,35 +1881,47 @@ class textlineerkenner:
def run(self): def run(self):
#get image and sclaes, then extract the page of scanned image #get image and sclaes, then extract the page of scanned image
t1=time.time()
self.get_image_and_scales() self.get_image_and_scales()
image_page,page_coord=self.extract_page() image_page,page_coord=self.extract_page()
########## ##########
K.clear_session() K.clear_session()
gc.collect() gc.collect()
t2=time.time()
# extract text regions and corresponding contours and surrounding box # extract text regions and corresponding contours and surrounding box
text_regions=self.extract_text_regions(image_page) text_regions=self.extract_text_regions(image_page)
boxes,contours=self.get_text_region_contours_and_boxes(text_regions) contours=self.get_text_region_contours_and_boxes(text_regions)
########## ##########
K.clear_session() K.clear_session()
gc.collect() gc.collect()
t3=time.time()
if len(contours)>0: if len(contours)>0:
self.get_all_image_patches_based_on_text_regions(boxes,image_page) self.get_all_image_patches_coordination(image_page)
########## ##########
gc.collect() gc.collect()
# extracting textlines using segmentation # extracting textlines using segmentation
textline_mask_tot=self.textline_contours(image_page) textline_mask_tot=self.textline_contours(image_page)
#plt.imshow(textline_mask_tot)
#plt.show()
########## ##########
K.clear_session() K.clear_session()
gc.collect() gc.collect()
t4=time.time()
# get orders of each textregion. This method by now only works for one column documents. # get orders of each textregion. This method by now only works for one column documents.
indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours) indexes_sorted, matrix_of_orders=self.order_of_regions(textline_mask_tot,contours)
order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted ) order_of_texts, id_of_texts=self.order_and_id_of_texts(contours ,matrix_of_orders ,indexes_sorted )
@ -1903,24 +1929,31 @@ class textlineerkenner:
########## ##########
gc.collect() gc.collect()
t5=time.time()
# just get the textline result for each box of text regions # just get the textline result for each box of text regions
self.get_textlines_for_each_textregions(textline_mask_tot,boxes) #self.get_textlines_for_each_textregions(textline_mask_tot)
########## ##########
gc.collect()
# calculate the slope for deskewing for each box of text region. # calculate the slope for deskewing for each box of text region.
self.get_slopes_for_each_text_region(contours) self.get_slopes_for_each_text_region(contours,textline_mask_tot)
########## ##########
gc.collect() gc.collect()
t6=time.time()
# do deskewing for each box of text region. # do deskewing for each box of text region.
self.deskew_textline_patches(contours, boxes) self.deskew_textline_patches(contours,textline_mask_tot)
########## ##########
gc.collect() gc.collect()
t7=time.time()
else: else:
contours=[] contours=[]
order_of_texts=None order_of_texts=None
@ -1929,7 +1962,16 @@ class textlineerkenner:
# Destroy the current Keras session/graph to free memory # Destroy the current Keras session/graph to free memory
K.clear_session() K.clear_session()
print( "time total = "+"{0:.2f}".format(time.time()-t1) )
print( "time needed for page extraction = "+"{0:.2f}".format(t2-t1) )
print( "time needed for text region extraction and get contours = "+"{0:.2f}".format(t3-t2) )
print( "time needed for textlines = "+"{0:.2f}".format(t4-t3) )
print( "time needed to get order of regions = "+"{0:.2f}".format(t5-t4) )
print( "time needed to get slopes of regions (deskewing) = "+"{0:.2f}".format(t6-t5) )
print( "time needed to implement deskewing = "+"{0:.2f}".format(t7-t6) )
@click.command() @click.command()
@click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False)) @click.option('--image', '-i', help='image filename', type=click.Path(exists=True, dir_okay=False))

Loading…
Cancel
Save