mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-08-29 03:49:54 +02:00
updating ocr
This commit is contained in:
parent
0f154c605a
commit
b93fc112bf
3 changed files with 58 additions and 31 deletions
|
@ -337,6 +337,12 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
help="image filename",
|
||||
type=click.Path(exists=True, dir_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
"--overwrite",
|
||||
"-O",
|
||||
help="overwrite (instead of skipping) if output xml exists",
|
||||
is_flag=True,
|
||||
)
|
||||
@click.option(
|
||||
"--dir_in",
|
||||
"-di",
|
||||
|
@ -421,7 +427,7 @@ def layout(image, out, overwrite, dir_in, model, save_images, save_layout, save_
|
|||
help="Override log level globally to this",
|
||||
)
|
||||
|
||||
def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
|
||||
def ocr(image, overwrite, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_ocr, export_textline_images_and_text, do_not_mask_with_textline_contour, draw_texts_on_image, prediction_with_both_of_rgb_and_bin, batch_size, dataset_abbrevation, log_level):
|
||||
initLogging()
|
||||
if log_level:
|
||||
getLogger('eynollah').setLevel(getLevelName(log_level))
|
||||
|
@ -449,7 +455,7 @@ def ocr(image, dir_in, dir_in_bin, out, dir_xmls, dir_out_image_text, model, tr_
|
|||
batch_size=batch_size,
|
||||
pref_of_dataset=dataset_abbrevation,
|
||||
)
|
||||
eynollah_ocr.run()
|
||||
eynollah_ocr.run(overwrite=overwrite)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -5338,6 +5338,8 @@ class Eynollah_ocr:
|
|||
self.dir_out_image_text = dir_out_image_text
|
||||
self.prediction_with_both_of_rgb_and_bin = prediction_with_both_of_rgb_and_bin
|
||||
self.pref_of_dataset = pref_of_dataset
|
||||
self.logger = logger if logger else getLogger('eynollah')
|
||||
|
||||
if not export_textline_images_and_text:
|
||||
if tr_ocr:
|
||||
self.processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
|
||||
|
@ -5351,7 +5353,7 @@ class Eynollah_ocr:
|
|||
self.b_s = int(batch_size)
|
||||
|
||||
else:
|
||||
self.model_ocr_dir = dir_models + "/model_step_750000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
||||
self.model_ocr_dir = dir_models + "/model_step_1075000_ocr"#"/model_step_125000_ocr"#"/model_step_25000_ocr"#"/model_step_1050000_ocr"#"/model_0_ocr_cnnrnn"#"/model_23_ocr_cnnrnn"
|
||||
model_ocr = load_model(self.model_ocr_dir , compile=False)
|
||||
|
||||
self.prediction_model = tf.keras.models.Model(
|
||||
|
@ -5377,7 +5379,7 @@ class Eynollah_ocr:
|
|||
vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
|
||||
)
|
||||
|
||||
def run(self):
|
||||
def run(self, overwrite : bool = False):
|
||||
if self.dir_in:
|
||||
ls_imgs = os.listdir(self.dir_in)
|
||||
else:
|
||||
|
@ -5394,6 +5396,14 @@ class Eynollah_ocr:
|
|||
dir_img = self.image_filename
|
||||
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
|
||||
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
|
||||
|
||||
if os.path.exists(out_file_ocr):
|
||||
if overwrite:
|
||||
self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
|
||||
else:
|
||||
self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
|
||||
continue
|
||||
|
||||
img = cv2.imread(dir_img)
|
||||
|
||||
if self.draw_texts_on_image:
|
||||
|
@ -5574,6 +5584,14 @@ class Eynollah_ocr:
|
|||
#dir_img = os.path.join(self.dir_in, ind_img)
|
||||
dir_xml = os.path.join(self.dir_xmls, file_name+'.xml')
|
||||
out_file_ocr = os.path.join(self.dir_out, file_name+'.xml')
|
||||
|
||||
if os.path.exists(out_file_ocr):
|
||||
if overwrite:
|
||||
self.logger.warning("will overwrite existing output file '%s'", out_file_ocr)
|
||||
else:
|
||||
self.logger.warning("will skip input for existing output file '%s'", out_file_ocr)
|
||||
continue
|
||||
|
||||
img = cv2.imread(dir_img)
|
||||
if self.prediction_with_both_of_rgb_and_bin:
|
||||
cropped_lines_bin = []
|
||||
|
@ -5704,7 +5722,7 @@ class Eynollah_ocr:
|
|||
cropped_lines_bin.append(img_fin)
|
||||
else:
|
||||
if self.prediction_with_both_of_rgb_and_bin:
|
||||
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin)
|
||||
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, img_crop_bin, prediction_with_both_of_rgb_and_bin=self.prediction_with_both_of_rgb_and_bin)
|
||||
else:
|
||||
splited_images, splited_images_bin = return_textlines_split_if_needed(img_crop, None)
|
||||
if splited_images:
|
||||
|
|
|
@ -74,32 +74,24 @@ def distortion_free_resize(image, img_size):
|
|||
def return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image):
|
||||
width = np.shape(textline_image)[1]
|
||||
height = np.shape(textline_image)[0]
|
||||
common_window = int(0.22*width)
|
||||
common_window = int(0.06*width)
|
||||
|
||||
width1 = int ( width/2. - common_window )
|
||||
width2 = int ( width/2. + common_window )
|
||||
|
||||
|
||||
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
||||
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
||||
|
||||
|
||||
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
||||
|
||||
if len(peaks_real)>35:
|
||||
if len(peaks_real)>70:
|
||||
|
||||
#peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
||||
argsort = np.argsort(sum_smoothed[peaks_real])[::-1]
|
||||
peaks_real_top_six = peaks_real[argsort[:6]]
|
||||
midpoint = textline_image.shape[1] / 2.
|
||||
arg_closest = np.argmin(np.abs(peaks_real_top_six - midpoint))
|
||||
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
||||
|
||||
#arg_max = np.argmax(sum_smoothed[peaks_real])
|
||||
|
||||
peaks_final = peaks_real_top_six[arg_closest]#peaks_real[arg_max]
|
||||
|
||||
arg_max = np.argmax(sum_smoothed[peaks_real])
|
||||
peaks_final = peaks_real[arg_max]
|
||||
return peaks_final
|
||||
else:
|
||||
return None
|
||||
|
||||
# Function to fit text inside the given area
|
||||
def fit_text_single_line(draw, text, font_path, max_width, max_height):
|
||||
initial_font_size = 50
|
||||
|
@ -305,17 +297,28 @@ def break_curved_line_into_small_pieces_and_then_merge(img_curved, mask_curved,
|
|||
#new bounding box
|
||||
x_n, y_n, w_n, h_n = get_contours_and_bounding_boxes(mask_in_des[:,:,0])
|
||||
|
||||
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
if img_bin_curved:
|
||||
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
|
||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||
if w_relative==0:
|
||||
w_relative = img_in_des.shape[1]
|
||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||
if img_bin_curved:
|
||||
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||
if w_n==0 or h_n==0:
|
||||
img_in_des = np.copy(img_in)
|
||||
if img_bin_curved:
|
||||
img_bin_in_des = np.copy(img_bin_in)
|
||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||
if w_relative==0:
|
||||
w_relative = img_in_des.shape[1]
|
||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||
if img_bin_curved:
|
||||
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||
else:
|
||||
mask_in_des = mask_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
img_in_des = img_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
if img_bin_curved:
|
||||
img_bin_in_des = img_bin_in_des[y_n:y_n+h_n, x_n:x_n+w_n, :]
|
||||
|
||||
w_relative = int(32 * img_in_des.shape[1]/float(img_in_des.shape[0]) )
|
||||
if w_relative==0:
|
||||
w_relative = img_in_des.shape[1]
|
||||
img_in_des = resize_image(img_in_des, 32, w_relative)
|
||||
if img_bin_curved:
|
||||
img_bin_in_des = resize_image(img_bin_in_des, 32, w_relative)
|
||||
|
||||
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue