mirror of
				https://github.com/qurator-spk/eynollah.git
				synced 2025-11-03 19:24:13 +01:00 
			
		
		
		
	remove unused delete_seperator_around
This commit is contained in:
		
							parent
							
								
									133982380f
								
							
						
					
					
						commit
						c80fddb3b8
					
				
					 3 changed files with 24 additions and 28 deletions
				
			
		| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring
 | 
					# pylint: disable=no-member,invalid-name,line-too-long,missing-function-docstring
 | 
				
			||||||
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines
 | 
					# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements
 | 
				
			||||||
# pylint: disable=too-many-public-methods
 | 
					# pylint: disable=too-many-public-methods,too-many-arguments,too-many-instance-attributes,too-many-public-methods,
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
tool to extract table form data from alto xml data
 | 
					tool to extract table form data from alto xml data
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
| 
						 | 
					@ -67,8 +67,6 @@ from .utils import (
 | 
				
			||||||
    crop_image_inside_box,
 | 
					    crop_image_inside_box,
 | 
				
			||||||
    find_num_col,
 | 
					    find_num_col,
 | 
				
			||||||
    otsu_copy_binary,
 | 
					    otsu_copy_binary,
 | 
				
			||||||
    delete_seperator_around,
 | 
					 | 
				
			||||||
    return_regions_without_seperators,
 | 
					 | 
				
			||||||
    put_drop_out_from_only_drop_model,
 | 
					    put_drop_out_from_only_drop_model,
 | 
				
			||||||
    putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
 | 
					    putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
 | 
				
			||||||
    check_any_text_region_in_model_one_is_main_or_header,
 | 
					    check_any_text_region_in_model_one_is_main_or_header,
 | 
				
			||||||
| 
						 | 
					@ -107,7 +105,7 @@ class eynollah:
 | 
				
			||||||
        allow_scaling=False,
 | 
					        allow_scaling=False,
 | 
				
			||||||
        headers_off=False
 | 
					        headers_off=False
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        self.image_filename = image_filename  # XXX This does not seem to be a directory as the name suggests, but a file
 | 
					        self.image_filename = image_filename
 | 
				
			||||||
        self.cont_page = []
 | 
					        self.cont_page = []
 | 
				
			||||||
        self.dir_out = dir_out
 | 
					        self.dir_out = dir_out
 | 
				
			||||||
        self.image_filename_stem = image_filename_stem
 | 
					        self.image_filename_stem = image_filename_stem
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3344,4 +3344,24 @@ def implent_law_head_main_not_parallel(text_regions):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return text_regions
 | 
					    return text_regions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def delete_seperator_around(spliter_y, peaks_neg, image_by_region):
 | 
				
			||||||
 | 
					    # format of subboxes box=[x1, x2 , y1, y2]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if len(image_by_region.shape) == 3:
 | 
				
			||||||
 | 
					        for i in range(len(spliter_y) - 1):
 | 
				
			||||||
 | 
					            for j in range(1, len(peaks_neg[i]) - 1):
 | 
				
			||||||
 | 
					                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 6] = 0
 | 
				
			||||||
 | 
					                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 6] = 0
 | 
				
			||||||
 | 
					                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 6] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 7] = 0
 | 
				
			||||||
 | 
					                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 7] = 0
 | 
				
			||||||
 | 
					                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 7] = 0
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        for i in range(len(spliter_y) - 1):
 | 
				
			||||||
 | 
					            for j in range(1, len(peaks_neg[i]) - 1):
 | 
				
			||||||
 | 
					                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 6] = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 7] = 0
 | 
				
			||||||
 | 
					    return image_by_region
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -718,28 +718,6 @@ def find_num_col_by_vertical_lines(regions_without_seperators, multiplier=3.8):
 | 
				
			||||||
    # print(peaks,'peaksnew')
 | 
					    # print(peaks,'peaksnew')
 | 
				
			||||||
    return peaks
 | 
					    return peaks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
def delete_seperator_around(spliter_y, peaks_neg, image_by_region):
 | 
					 | 
				
			||||||
    # format of subboxes box=[x1, x2 , y1, y2]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if len(image_by_region.shape) == 3:
 | 
					 | 
				
			||||||
        for i in range(len(spliter_y) - 1):
 | 
					 | 
				
			||||||
            for j in range(1, len(peaks_neg[i]) - 1):
 | 
					 | 
				
			||||||
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 6] = 0
 | 
					 | 
				
			||||||
                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 6] = 0
 | 
					 | 
				
			||||||
                image_by_region[spliter_y[i] : spliter_y[i + 1], peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 6] = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0] == 7] = 0
 | 
					 | 
				
			||||||
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 1] == 7] = 0
 | 
					 | 
				
			||||||
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 0][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j]), 2] == 7] = 0
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        for i in range(len(spliter_y) - 1):
 | 
					 | 
				
			||||||
            for j in range(1, len(peaks_neg[i]) - 1):
 | 
					 | 
				
			||||||
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 6] = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])][image_by_region[int(spliter_y[i]) : int(spliter_y[i + 1]), peaks_neg[i][j] - int(1.0 / 20.0 * peaks_neg[i][j]) : peaks_neg[i][j] + int(1.0 / 20.0 * peaks_neg[i][j])] == 7] = 0
 | 
					 | 
				
			||||||
    return image_by_region
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def return_regions_without_seperators(regions_pre):
 | 
					def return_regions_without_seperators(regions_pre):
 | 
				
			||||||
    kernel = np.ones((5, 5), np.uint8)
 | 
					    kernel = np.ones((5, 5), np.uint8)
 | 
				
			||||||
    regions_without_seperators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1
 | 
					    regions_without_seperators = ((regions_pre[:, :] != 6) & (regions_pre[:, :] != 0)) * 1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue