self.kernel -> constant KERNEL

pull/19/head
Konstantin Baierer 4 years ago
parent 254dd35666
commit ddeb6938e5

@ -91,6 +91,7 @@ SLOPE_THRESHOLD = 0.13
RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45: RATIO_OF_TWO_MODEL_THRESHOLD = 95.50 #98.45:
DPI_THRESHOLD = 298 DPI_THRESHOLD = 298
MAX_SLOPE = 999 MAX_SLOPE = 999
KERNEL = np.ones((5, 5), np.uint8)
class eynollah: class eynollah:
def __init__( def __init__(
@ -131,7 +132,6 @@ class eynollah:
) )
self.logger = getLogger('eynollah') self.logger = getLogger('eynollah')
self.dir_models = dir_models self.dir_models = dir_models
self.kernel = np.ones((5, 5), np.uint8)
self.model_dir_of_enhancemnet = dir_models + "/model_enhancement.h5" self.model_dir_of_enhancemnet = dir_models + "/model_enhancement.h5"
self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5" self.model_dir_of_col_classifier = dir_models + "/model_scale_classifier.h5"
@ -554,14 +554,13 @@ class eynollah:
self.logger.debug("enter early_page_for_num_of_column_classification") self.logger.debug("enter early_page_for_num_of_column_classification")
img = self.imread() img = self.imread()
model_page, session_page = self.start_new_session_and_model(self.model_page_dir) model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
for ii in range(1):
img = cv2.GaussianBlur(img, (5, 5), 0) img = cv2.GaussianBlur(img, (5, 5), 0)
img_page_prediction = self.do_prediction(False, img, model_page) img_page_prediction = self.do_prediction(False, img, model_page)
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
thresh = cv2.dilate(thresh, self.kernel, iterations=3) thresh = cv2.dilate(thresh, KERNEL, iterations=3)
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
cnt = contours[np.argmax(cnt_size)] cnt = contours[np.argmax(cnt_size)]
@ -576,7 +575,6 @@ class eynollah:
def extract_page(self): def extract_page(self):
self.logger.debug("enter extract_page") self.logger.debug("enter extract_page")
model_page, session_page = self.start_new_session_and_model(self.model_page_dir) model_page, session_page = self.start_new_session_and_model(self.model_page_dir)
for ii in range(1):
img = cv2.GaussianBlur(self.image, (5, 5), 0) img = cv2.GaussianBlur(self.image, (5, 5), 0)
img_page_prediction = self.do_prediction(False, img, model_page) img_page_prediction = self.do_prediction(False, img, model_page)
@ -584,7 +582,7 @@ class eynollah:
imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY) imgray = cv2.cvtColor(img_page_prediction, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(imgray, 0, 255, 0) _, thresh = cv2.threshold(imgray, 0, 255, 0)
thresh = cv2.dilate(thresh, self.kernel, iterations=3) thresh = cv2.dilate(thresh, KERNEL, iterations=3)
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))]) cnt_size = np.array([cv2.contourArea(contours[j]) for j in range(len(contours))])
@ -830,7 +828,7 @@ class eynollah:
all_text_region_raw = all_text_region_raw.astype(np.uint8) all_text_region_raw = all_text_region_raw.astype(np.uint8)
img_int_p = all_text_region_raw[:, :] img_int_p = all_text_region_raw[:, :]
# img_int_p=cv2.erode(img_int_p,self.kernel,iterations = 2) # img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2)
# plt.imshow(img_int_p) # plt.imshow(img_int_p)
# plt.show() # plt.show()
@ -897,9 +895,9 @@ class eynollah:
mask_biggest2 = np.zeros(mask_texts_only.shape) mask_biggest2 = np.zeros(mask_texts_only.shape)
mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1)) mask_biggest2 = cv2.fillPoly(mask_biggest2, pts=[cnt_textlines_in_image[jjjj]], color=(1, 1, 1))
if num_col + 1 == 1: if num_col + 1 == 1:
mask_biggest2 = cv2.dilate(mask_biggest2, self.kernel, iterations=5) mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=5)
else: else:
mask_biggest2 = cv2.dilate(mask_biggest2, self.kernel, iterations=4) mask_biggest2 = cv2.dilate(mask_biggest2, KERNEL, iterations=4)
pixel_img = 1 pixel_img = 1
mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par)) mask_biggest2 = resize_image(mask_biggest2, int(mask_biggest2.shape[0] * scale_par), int(mask_biggest2.shape[1] * scale_par))
@ -941,7 +939,7 @@ class eynollah:
all_text_region_raw=(textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ] all_text_region_raw=(textline_mask_tot_ea*mask_textline[:,:])[boxes_text[mv][1]:boxes_text[mv][1]+boxes_text[mv][3] , boxes_text[mv][0]:boxes_text[mv][0]+boxes_text[mv][2] ]
all_text_region_raw=all_text_region_raw.astype(np.uint8) all_text_region_raw=all_text_region_raw.astype(np.uint8)
img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv] img_int_p=all_text_region_raw[:,:]#self.all_text_region_raw[mv]
img_int_p=cv2.erode(img_int_p,self.kernel,iterations = 2) img_int_p=cv2.erode(img_int_p,KERNEL,iterations = 2)
if img_int_p.shape[0]/img_int_p.shape[1]<0.1: if img_int_p.shape[0]/img_int_p.shape[1]<0.1:
slopes_per_each_subprocess.append(0) slopes_per_each_subprocess.append(0)
@ -1025,11 +1023,9 @@ class eynollah:
boxes_sub_new = [] boxes_sub_new = []
poly_sub = [] poly_sub = []
for mv in range(len(boxes_per_process)): for mv in range(len(boxes_per_process)):
crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2)) crop_img, _ = crop_image_inside_box(boxes_per_process[mv], np.repeat(textline_mask_tot[:, :, np.newaxis], 3, axis=2))
crop_img = crop_img[:, :, 0] crop_img = crop_img[:, :, 0]
crop_img = cv2.erode(crop_img, self.kernel, iterations=2) crop_img = cv2.erode(crop_img, KERNEL, iterations=2)
try: try:
textline_con, hierachy = return_contours_of_image(crop_img) textline_con, hierachy = return_contours_of_image(crop_img)
textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierachy, max_area=1, min_area=0.0008) textline_con_fil = filter_contours_area_of_image(crop_img, textline_con, hierachy, max_area=1, min_area=0.0008)
@ -1194,7 +1190,7 @@ class eynollah:
tree = ET.ElementTree(pcgts) tree = ET.ElementTree(pcgts)
tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml") tree.write(os.path.join(self.dir_out, self.image_filename_stem) + ".xml")
def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals): def build_pagexml_no_full_layout(self, found_polygons_text_region, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_box_coord, found_polygons_text_region_img, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes):
self.logger.debug('enter build_pagexml_no_full_layout') self.logger.debug('enter build_pagexml_no_full_layout')
# create the file structure # create the file structure
@ -1228,7 +1224,7 @@ class eynollah:
id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals) id_indexer = len(found_polygons_text_region) + len(found_polygons_marginals)
for mm in range(len(found_polygons_text_region_img)): for mm in range(len(found_polygons_text_region_img)):
textregion=ET.SubElement(page, 'ImageRegion') textregion = ET.SubElement(page, 'ImageRegion')
textregion.set('id', 'r%s' % id_indexer) textregion.set('id', 'r%s' % id_indexer)
id_indexer += 1 id_indexer += 1
coord_text = ET.SubElement(textregion, 'Coords') coord_text = ET.SubElement(textregion, 'Coords')
@ -1243,7 +1239,7 @@ class eynollah:
return pcgts return pcgts
def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes, slopes_marginals): def build_pagexml_full_layout(self, found_polygons_text_region, found_polygons_text_region_h, page_coord, order_of_texts, id_of_texts, all_found_texline_polygons, all_found_texline_polygons_h, all_box_coord, all_box_coord_h, found_polygons_text_region_img, found_polygons_tables, found_polygons_drop_capitals, found_polygons_marginals, all_found_texline_polygons_marginals, all_box_coord_marginals, slopes):
self.logger.debug('enter build_pagexml_full_layout') self.logger.debug('enter build_pagexml_full_layout')
# create the file structure # create the file structure
@ -1375,12 +1371,12 @@ class eynollah:
prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3 prediction_regions_org[(mask_lines2[:,:]==1) & (prediction_regions_org[:,:]==0)]=3
mask_lines_only=(prediction_regions_org[:,:]==3)*1 mask_lines_only=(prediction_regions_org[:,:]==3)*1
prediction_regions_org = cv2.erode(prediction_regions_org[:,:], self.kernel, iterations=2) prediction_regions_org = cv2.erode(prediction_regions_org[:,:], KERNEL, iterations=2)
#plt.imshow(text_region2_1st_channel) #plt.imshow(text_region2_1st_channel)
#plt.show() #plt.show()
prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], self.kernel, iterations=2) prediction_regions_org = cv2.dilate(prediction_regions_org[:,:], KERNEL, iterations=2)
mask_texts_only=(prediction_regions_org[:,:]==1)*1 mask_texts_only=(prediction_regions_org[:,:]==1)*1
mask_images_only=(prediction_regions_org[:,:]==2)*1 mask_images_only=(prediction_regions_org[:,:]==2)*1
@ -1680,14 +1676,14 @@ class eynollah:
mask_images = (text_regions_p_1[:, :] == 2) * 1 mask_images = (text_regions_p_1[:, :] == 2) * 1
mask_images = mask_images.astype(np.uint8) mask_images = mask_images.astype(np.uint8)
mask_images = cv2.erode(mask_images[:, :], self.kernel, iterations=10) mask_images = cv2.erode(mask_images[:, :], KERNEL, iterations=10)
mask_lines = (text_regions_p_1[:, :] == 3) * 1 mask_lines = (text_regions_p_1[:, :] == 3) * 1
mask_lines = mask_lines.astype(np.uint8) mask_lines = mask_lines.astype(np.uint8)
img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1 img_only_regions_with_sep = ((text_regions_p_1[:, :] != 3) & (text_regions_p_1[:, :] != 0)) * 1
img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8) img_only_regions_with_sep = img_only_regions_with_sep.astype(np.uint8)
img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], self.kernel, iterations=6) img_only_regions = cv2.erode(img_only_regions_with_sep[:, :], KERNEL, iterations=6)
try: try:
num_col, peaks_neg_fin = find_num_col(img_only_regions, multiplier=6.0) num_col, peaks_neg_fin = find_num_col(img_only_regions, multiplier=6.0)
@ -1739,7 +1735,7 @@ class eynollah:
def run_deskew(self, textline_mask_tot_ea): def run_deskew(self, textline_mask_tot_ea):
sigma = 2 sigma = 2
main_page_deskew = True main_page_deskew = True
slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, self.kernel, iterations=2), sigma, main_page_deskew, plotter=self.plotter) slope_deskew = return_deskew_slop(cv2.erode(textline_mask_tot_ea, KERNEL, iterations=2), sigma, main_page_deskew, plotter=self.plotter)
slope_first = 0 slope_first = 0
if self.plotter: if self.plotter:
@ -1763,7 +1759,7 @@ class eynollah:
try: try:
regions_without_seperators = (text_regions_p[:, :] == 1) * 1 regions_without_seperators = (text_regions_p[:, :] == 1) * 1
regions_without_seperators = regions_without_seperators.astype(np.uint8) regions_without_seperators = regions_without_seperators.astype(np.uint8)
text_regions_p = get_marginals(rotate_image(regions_without_seperators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=self.kernel) text_regions_p = get_marginals(rotate_image(regions_without_seperators, slope_deskew), text_regions_p, num_col_classifier, slope_deskew, kernel=KERNEL)
except Exception as e: except Exception as e:
self.logger.error("exception %s", e) self.logger.error("exception %s", e)
pass pass
@ -1798,14 +1794,14 @@ class eynollah:
if num_col_classifier >= 3: if num_col_classifier >= 3:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
regions_without_seperators = regions_without_seperators.astype(np.uint8) regions_without_seperators = regions_without_seperators.astype(np.uint8)
regions_without_seperators = cv2.erode(regions_without_seperators[:, :], self.kernel, iterations=6) regions_without_seperators = cv2.erode(regions_without_seperators[:, :], KERNEL, iterations=6)
#random_pixels_for_image = np.random.randn(regions_without_seperators.shape[0], regions_without_seperators.shape[1]) #random_pixels_for_image = np.random.randn(regions_without_seperators.shape[0], regions_without_seperators.shape[1])
#random_pixels_for_image[random_pixels_for_image < -0.5] = 0 #random_pixels_for_image[random_pixels_for_image < -0.5] = 0
#random_pixels_for_image[random_pixels_for_image != 0] = 1 #random_pixels_for_image[random_pixels_for_image != 0] = 1
#regions_without_seperators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 2)] = 1 #regions_without_seperators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 2)] = 1
else: else:
regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8) regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8)
regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:, :], self.kernel, iterations=6) regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:, :], KERNEL, iterations=6)
#random_pixels_for_image = np.random.randn(regions_without_seperators_d.shape[0], regions_without_seperators_d.shape[1]) #random_pixels_for_image = np.random.randn(regions_without_seperators_d.shape[0], regions_without_seperators_d.shape[1])
#random_pixels_for_image[random_pixels_for_image < -0.5] = 0 #random_pixels_for_image[random_pixels_for_image < -0.5] = 0
#random_pixels_for_image[random_pixels_for_image != 0] = 1 #random_pixels_for_image[random_pixels_for_image != 0] = 1
@ -2065,9 +2061,9 @@ class eynollah:
else: else:
scale_param = 1 scale_param = 1
all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=self.kernel, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew) all_found_texline_polygons, boxes_text, txt_con_org, contours_only_text_parent, all_box_coord, index_by_text_par_con, slopes = self.get_slopes_and_deskew_new_curved(txt_con_org, contours_only_text_parent, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_text, text_only, num_col_classifier, scale_param, slope_deskew)
all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier) all_found_texline_polygons = small_textlines_to_parent_adherence2(all_found_texline_polygons, textline_mask_tot_ea, num_col_classifier)
all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, index_by_text_par_con_marginal, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=self.kernel, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew) all_found_texline_polygons_marginals, boxes_marginals, _, polygons_of_marginals, all_box_coord_marginals, index_by_text_par_con_marginal, slopes_marginals = self.get_slopes_and_deskew_new_curved(polygons_of_marginals, polygons_of_marginals, cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=1), image_page_rotated, boxes_marginals, text_only, num_col_classifier, scale_param, slope_deskew)
all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier) all_found_texline_polygons_marginals = small_textlines_to_parent_adherence2(all_found_texline_polygons_marginals, textline_mask_tot_ea, num_col_classifier)
index_of_vertical_text_contours = np.array(range(len(slopes)))[(abs(np.array(slopes)) > 60)] index_of_vertical_text_contours = np.array(range(len(slopes)))[(abs(np.array(slopes)) > 60)]
@ -2091,7 +2087,7 @@ class eynollah:
polygons_of_tabels = [] polygons_of_tabels = []
pixel_img = 4 pixel_img = 4
polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img) polygons_of_drop_capitals = return_contours_of_interested_region_by_min_size(text_regions_p, pixel_img)
all_found_texline_polygons = adhere_drop_capital_region_into_cprresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=self.kernel, curved_line=self.curved_line) all_found_texline_polygons = adhere_drop_capital_region_into_cprresponding_textline(text_regions_p, polygons_of_drop_capitals, contours_only_text_parent, contours_only_text_parent_h, all_box_coord, all_box_coord_h, all_found_texline_polygons, all_found_texline_polygons_h, kernel=KERNEL, curved_line=self.curved_line)
# print(len(contours_only_text_parent_h),len(contours_only_text_parent_h_d_ordered),'contours_only_text_parent_h') # print(len(contours_only_text_parent_h),len(contours_only_text_parent_h_d_ordered),'contours_only_text_parent_h')
pixel_lines = 6 pixel_lines = 6
@ -2114,14 +2110,14 @@ class eynollah:
if num_col_classifier >= 3: if num_col_classifier >= 3:
if np.abs(slope_deskew) < SLOPE_THRESHOLD: if np.abs(slope_deskew) < SLOPE_THRESHOLD:
regions_without_seperators = regions_without_seperators.astype(np.uint8) regions_without_seperators = regions_without_seperators.astype(np.uint8)
regions_without_seperators = cv2.erode(regions_without_seperators[:, :], self.kernel, iterations=6) regions_without_seperators = cv2.erode(regions_without_seperators[:, :], KERNEL, iterations=6)
random_pixels_for_image = np.random.randn(regions_without_seperators.shape[0], regions_without_seperators.shape[1]) random_pixels_for_image = np.random.randn(regions_without_seperators.shape[0], regions_without_seperators.shape[1])
random_pixels_for_image[random_pixels_for_image < -0.5] = 0 random_pixels_for_image[random_pixels_for_image < -0.5] = 0
random_pixels_for_image[random_pixels_for_image != 0] = 1 random_pixels_for_image[random_pixels_for_image != 0] = 1
regions_without_seperators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 5)] = 1 regions_without_seperators[(random_pixels_for_image[:, :] == 1) & (text_regions_p[:, :] == 5)] = 1
else: else:
regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8) regions_without_seperators_d = regions_without_seperators_d.astype(np.uint8)
regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:, :], self.kernel, iterations=6) regions_without_seperators_d = cv2.erode(regions_without_seperators_d[:, :], KERNEL, iterations=6)
random_pixels_for_image = np.random.randn(regions_without_seperators_d.shape[0], regions_without_seperators_d.shape[1]) random_pixels_for_image = np.random.randn(regions_without_seperators_d.shape[0], regions_without_seperators_d.shape[1])
random_pixels_for_image[random_pixels_for_image < -0.5] = 0 random_pixels_for_image[random_pixels_for_image < -0.5] = 0
random_pixels_for_image[random_pixels_for_image != 0] = 1 random_pixels_for_image[random_pixels_for_image != 0] = 1

Loading…
Cancel
Save