mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
🔥 remove OCR option from eynollah layout
This commit is contained in:
parent
0f410c2e7c
commit
5a1900e664
5 changed files with 16 additions and 360 deletions
|
|
@ -120,9 +120,6 @@ The following options can be used to further configure the processing:
|
|||
| `-sa <directory>` | save all (plot, enhanced/binary image, layout) to this directory |
|
||||
| `-thart` | threshold of artifical class in the case of textline detection. The default value is 0.1 |
|
||||
| `-tharl` | threshold of artifical class in the case of layout detection. The default value is 0.1 |
|
||||
| `-ocr` | do ocr |
|
||||
| `-tr` | apply transformer ocr. Default model is a CNN-RNN model |
|
||||
| `-bs_ocr` | ocr inference batch size. Default bs for trocr and cnn_rnn models are 2 and 8 respectively |
|
||||
| `-ncu` | upper limit of columns in document image |
|
||||
| `-ncl` | lower limit of columns in document image |
|
||||
| `-slro` | skip layout detection and reading order |
|
||||
|
|
|
|||
|
|
@ -321,7 +321,7 @@ def enhancement(ctx, image, out, overwrite, dir_in, num_col_upper, num_col_lower
|
|||
"--input_binary/--input-RGB",
|
||||
"-ib/-irgb",
|
||||
is_flag=True,
|
||||
help="in general, eynollah uses RGB as input but if the input document is strongly dark, bright or for any other reason you can turn binarized input on. This option does not mean that you have to provide a binary image, otherwise this means that the tool itself will binarized the RGB input document.",
|
||||
help="In general, eynollah uses RGB as input but if the input document is very dark, very bright or for any other reason you can turn on input binarization. When this flag is set, eynollah will binarize the RGB input document, you should always provide RGB images to eynollah.",
|
||||
)
|
||||
@click.option(
|
||||
"--allow_scaling/--no-allow-scaling",
|
||||
|
|
@ -353,23 +353,6 @@ def enhancement(ctx, image, out, overwrite, dir_in, num_col_upper, num_col_lower
|
|||
is_flag=True,
|
||||
help="if this parameter set to true, this tool would apply machine based reading order detection",
|
||||
)
|
||||
@click.option(
|
||||
"--do_ocr",
|
||||
"-ocr/-noocr",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will try to do ocr",
|
||||
)
|
||||
@click.option(
|
||||
"--transformer_ocr",
|
||||
"-tr/-notr",
|
||||
is_flag=True,
|
||||
help="if this parameter set to true, this tool will apply transformer ocr",
|
||||
)
|
||||
@click.option(
|
||||
"--batch_size_ocr",
|
||||
"-bs_ocr",
|
||||
help="number of inference batch size of ocr model. Default b_s for trocr and cnn_rnn models are 2 and 8 respectively",
|
||||
)
|
||||
@click.option(
|
||||
"--num_col_upper",
|
||||
"-ncu",
|
||||
|
|
@ -421,9 +404,6 @@ def layout(
|
|||
headers_off,
|
||||
light_version,
|
||||
reading_order_machine_based,
|
||||
do_ocr,
|
||||
transformer_ocr,
|
||||
batch_size_ocr,
|
||||
num_col_upper,
|
||||
num_col_lower,
|
||||
threshold_art_class_textline,
|
||||
|
|
@ -470,9 +450,6 @@ def layout(
|
|||
light_version=light_version,
|
||||
ignore_page_extraction=ignore_page_extraction,
|
||||
reading_order_machine_based=reading_order_machine_based,
|
||||
do_ocr=do_ocr,
|
||||
transformer_ocr=transformer_ocr,
|
||||
batch_size_ocr=batch_size_ocr,
|
||||
num_col_upper=num_col_upper,
|
||||
num_col_lower=num_col_lower,
|
||||
skip_layout_and_reading_order=skip_layout_and_reading_order,
|
||||
|
|
@ -506,7 +483,15 @@ def layout(
|
|||
@click.option(
|
||||
"--dir_in_bin",
|
||||
"-dib",
|
||||
help="directory of binarized images (in addition to --dir_in for RGB images; filename stems must match the RGB image files, with '.png' suffix).\nPerform prediction using both RGB and binary images. (This does not necessarily improve results, however it may be beneficial for certain document images.)",
|
||||
help=("""
|
||||
directory of binarized images (in addition to --dir_in for RGB
|
||||
images; filename stems must match the RGB image files, with '.png'
|
||||
\n
|
||||
Perform prediction using both RGB and binary images.
|
||||
(This does not necessarily improve results, however it may be beneficial
|
||||
for certain document images.
|
||||
"""),
|
||||
|
||||
type=click.Path(exists=True, file_okay=False),
|
||||
)
|
||||
@click.option(
|
||||
|
|
|
|||
|
|
@ -95,19 +95,6 @@ from .utils.rotate import (
|
|||
rotation_not_90_func_full_layout,
|
||||
rotation_image_new
|
||||
)
|
||||
from .utils.utils_ocr import (
|
||||
return_start_and_end_of_common_text_of_textline_ocr_without_common_section,
|
||||
return_textline_contour_with_added_box_coordinate,
|
||||
preprocess_and_resize_image_for_ocrcnn_model,
|
||||
return_textlines_split_if_needed,
|
||||
decode_batch_predictions,
|
||||
return_rnn_cnn_ocr_of_given_textlines,
|
||||
fit_text_single_line,
|
||||
break_curved_line_into_small_pieces_and_then_merge,
|
||||
get_orientation_moments,
|
||||
rotate_image_with_padding,
|
||||
get_contours_and_bounding_boxes
|
||||
)
|
||||
from .utils.separate_lines import (
|
||||
separate_lines_new2,
|
||||
return_deskew_slop,
|
||||
|
|
@ -176,9 +163,6 @@ class Eynollah:
|
|||
light_version : bool = False,
|
||||
ignore_page_extraction : bool = False,
|
||||
reading_order_machine_based : bool = False,
|
||||
do_ocr : bool = False,
|
||||
transformer_ocr: bool = False,
|
||||
batch_size_ocr: Optional[int] = None,
|
||||
num_col_upper : Optional[int] = None,
|
||||
num_col_lower : Optional[int] = None,
|
||||
threshold_art_class_layout: Optional[float] = None,
|
||||
|
|
@ -209,12 +193,6 @@ class Eynollah:
|
|||
self.extract_only_images = extract_only_images
|
||||
self.ignore_page_extraction = ignore_page_extraction
|
||||
self.skip_layout_and_reading_order = skip_layout_and_reading_order
|
||||
self.ocr = do_ocr
|
||||
self.tr = transformer_ocr
|
||||
if not batch_size_ocr:
|
||||
self.b_s_ocr = 8
|
||||
else:
|
||||
self.b_s_ocr = int(batch_size_ocr)
|
||||
if num_col_upper:
|
||||
self.num_col_upper = int(num_col_upper)
|
||||
else:
|
||||
|
|
@ -284,14 +262,6 @@ class Eynollah:
|
|||
if self.tables:
|
||||
loadable.append(("table", 'light' if self.light_version else ''))
|
||||
|
||||
if self.ocr:
|
||||
if self.tr:
|
||||
loadable.append(('ocr', 'tr'))
|
||||
loadable.append(('trocr_processor', ''))
|
||||
else:
|
||||
loadable.append('ocr')
|
||||
loadable.append('num_to_char')
|
||||
|
||||
self.model_zoo.load_models(*loadable)
|
||||
|
||||
def __del__(self):
|
||||
|
|
@ -2078,15 +2048,7 @@ class Eynollah:
|
|||
###img_bin = np.copy(prediction_bin)
|
||||
###else:
|
||||
###img_bin = np.copy(img_resized)
|
||||
if (self.ocr and self.tr) and not self.input_binary:
|
||||
prediction_bin = self.do_prediction(True, img_resized, self.model_zoo.get("binarization"), n_batch_inference=5)
|
||||
prediction_bin = 255 * (prediction_bin[:,:,0] == 0)
|
||||
prediction_bin = np.repeat(prediction_bin[:, :, np.newaxis], 3, axis=2)
|
||||
prediction_bin = prediction_bin.astype(np.uint16)
|
||||
#img= np.copy(prediction_bin)
|
||||
img_bin = np.copy(prediction_bin)
|
||||
else:
|
||||
img_bin = np.copy(img_resized)
|
||||
img_bin = np.copy(img_resized)
|
||||
#print("inside 1 ", time.time()-t_in)
|
||||
|
||||
###textline_mask_tot_ea = self.run_textline(img_bin)
|
||||
|
|
@ -3586,190 +3548,13 @@ class Eynollah:
|
|||
region_ids = ['region_%04d' % i for i in range(len(co_text_all_org))]
|
||||
return ordered, region_ids
|
||||
|
||||
def return_start_and_end_of_common_text_of_textline_ocr(self,textline_image, ind_tot):
|
||||
width = np.shape(textline_image)[1]
|
||||
height = np.shape(textline_image)[0]
|
||||
common_window = int(0.2*width)
|
||||
|
||||
width1 = int ( width/2. - common_window )
|
||||
width2 = int ( width/2. + common_window )
|
||||
|
||||
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
||||
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
||||
|
||||
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
||||
|
||||
if len(peaks_real)>70:
|
||||
print(len(peaks_real), 'len(peaks_real)')
|
||||
peaks_real = peaks_real[(peaks_real<width2) & (peaks_real>width1)]
|
||||
|
||||
arg_sort = np.argsort(sum_smoothed[peaks_real])
|
||||
arg_sort4 =arg_sort[::-1][:4]
|
||||
peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
|
||||
|
||||
argsort_sorted = np.argsort(peaks_sort_4)
|
||||
first_4_sorted = peaks_sort_4[argsort_sorted]
|
||||
y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
|
||||
#print(first_4_sorted,'first_4_sorted')
|
||||
|
||||
arg_sortnew = np.argsort(y_4_sorted)
|
||||
peaks_final =np.sort( first_4_sorted[arg_sortnew][2:] )
|
||||
|
||||
#plt.figure(ind_tot)
|
||||
#plt.imshow(textline_image)
|
||||
#plt.plot([peaks_final[0], peaks_final[0]], [0, height-1])
|
||||
#plt.plot([peaks_final[1], peaks_final[1]], [0, height-1])
|
||||
#plt.savefig('./'+str(ind_tot)+'.png')
|
||||
|
||||
return peaks_final[0], peaks_final[1]
|
||||
else:
|
||||
pass
|
||||
|
||||
def return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
|
||||
self, peaks_real, sum_smoothed, start_split, end_split):
|
||||
|
||||
peaks_real = peaks_real[(peaks_real<end_split) & (peaks_real>start_split)]
|
||||
|
||||
arg_sort = np.argsort(sum_smoothed[peaks_real])
|
||||
arg_sort4 =arg_sort[::-1][:4]
|
||||
peaks_sort_4 = peaks_real[arg_sort][::-1][:4]
|
||||
argsort_sorted = np.argsort(peaks_sort_4)
|
||||
|
||||
first_4_sorted = peaks_sort_4[argsort_sorted]
|
||||
y_4_sorted = sum_smoothed[peaks_real][arg_sort4[argsort_sorted]]
|
||||
#print(first_4_sorted,'first_4_sorted')
|
||||
|
||||
arg_sortnew = np.argsort(y_4_sorted)
|
||||
peaks_final =np.sort( first_4_sorted[arg_sortnew][3:] )
|
||||
return peaks_final[0]
|
||||
|
||||
def return_start_and_end_of_common_text_of_textline_ocr_new(self, textline_image, ind_tot):
|
||||
width = np.shape(textline_image)[1]
|
||||
height = np.shape(textline_image)[0]
|
||||
common_window = int(0.15*width)
|
||||
|
||||
width1 = int ( width/2. - common_window )
|
||||
width2 = int ( width/2. + common_window )
|
||||
mid = int(width/2.)
|
||||
|
||||
img_sum = np.sum(textline_image[:,:,0], axis=0)
|
||||
sum_smoothed = gaussian_filter1d(img_sum, 3)
|
||||
|
||||
peaks_real, _ = find_peaks(sum_smoothed, height=0)
|
||||
if len(peaks_real)>70:
|
||||
peak_start = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
|
||||
peaks_real, sum_smoothed, width1, mid+2)
|
||||
peak_end = self.return_start_and_end_of_common_text_of_textline_ocr_new_splitted(
|
||||
peaks_real, sum_smoothed, mid-2, width2)
|
||||
|
||||
#plt.figure(ind_tot)
|
||||
#plt.imshow(textline_image)
|
||||
#plt.plot([peak_start, peak_start], [0, height-1])
|
||||
#plt.plot([peak_end, peak_end], [0, height-1])
|
||||
#plt.savefig('./'+str(ind_tot)+'.png')
|
||||
|
||||
return peak_start, peak_end
|
||||
else:
|
||||
pass
|
||||
|
||||
def return_ocr_of_textline_without_common_section(
|
||||
self,
|
||||
textline_image,
|
||||
model_ocr,
|
||||
processor,
|
||||
device,
|
||||
width_textline,
|
||||
h2w_ratio,
|
||||
ind_tot,
|
||||
):
|
||||
|
||||
if h2w_ratio > 0.05:
|
||||
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
||||
generated_ids = model_ocr.generate(pixel_values.to(device))
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
else:
|
||||
#width = np.shape(textline_image)[1]
|
||||
#height = np.shape(textline_image)[0]
|
||||
#common_window = int(0.3*width)
|
||||
#width1 = int ( width/2. - common_window )
|
||||
#width2 = int ( width/2. + common_window )
|
||||
|
||||
split_point = return_start_and_end_of_common_text_of_textline_ocr_without_common_section(textline_image)
|
||||
if split_point:
|
||||
image1 = textline_image[:, :split_point,:]# image.crop((0, 0, width2, height))
|
||||
image2 = textline_image[:, split_point:,:]#image.crop((width1, 0, width, height))
|
||||
|
||||
#pixel_values1 = processor(image1, return_tensors="pt").pixel_values
|
||||
#pixel_values2 = processor(image2, return_tensors="pt").pixel_values
|
||||
|
||||
pixel_values_merged = processor([image1,image2], return_tensors="pt").pixel_values
|
||||
generated_ids_merged = model_ocr.generate(pixel_values_merged.to(device))
|
||||
generated_text_merged = processor.batch_decode(generated_ids_merged, skip_special_tokens=True)
|
||||
|
||||
#print(generated_text_merged,'generated_text_merged')
|
||||
|
||||
#generated_ids1 = model_ocr.generate(pixel_values1.to(device))
|
||||
#generated_ids2 = model_ocr.generate(pixel_values2.to(device))
|
||||
|
||||
#generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
|
||||
#generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
|
||||
|
||||
#generated_text = generated_text1 + ' ' + generated_text2
|
||||
generated_text = generated_text_merged[0] + ' ' + generated_text_merged[1]
|
||||
|
||||
#print(generated_text1,'generated_text1')
|
||||
#print(generated_text2, 'generated_text2')
|
||||
#print('########################################')
|
||||
else:
|
||||
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
||||
generated_ids = model_ocr.generate(pixel_values.to(device))
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
|
||||
#print(generated_text,'generated_text')
|
||||
#print('########################################')
|
||||
return generated_text
|
||||
|
||||
def return_ocr_of_textline(
|
||||
self, textline_image, model_ocr, processor, device, width_textline, h2w_ratio,ind_tot):
|
||||
|
||||
if h2w_ratio > 0.05:
|
||||
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
||||
generated_ids = model_ocr.generate(pixel_values.to(device))
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
else:
|
||||
#width = np.shape(textline_image)[1]
|
||||
#height = np.shape(textline_image)[0]
|
||||
#common_window = int(0.3*width)
|
||||
#width1 = int ( width/2. - common_window )
|
||||
#width2 = int ( width/2. + common_window )
|
||||
|
||||
try:
|
||||
width1, width2 = self.return_start_and_end_of_common_text_of_textline_ocr_new(textline_image, ind_tot)
|
||||
|
||||
image1 = textline_image[:, :width2,:]# image.crop((0, 0, width2, height))
|
||||
image2 = textline_image[:, width1:,:]#image.crop((width1, 0, width, height))
|
||||
|
||||
pixel_values1 = processor(image1, return_tensors="pt").pixel_values
|
||||
pixel_values2 = processor(image2, return_tensors="pt").pixel_values
|
||||
|
||||
generated_ids1 = model_ocr.generate(pixel_values1.to(device))
|
||||
generated_ids2 = model_ocr.generate(pixel_values2.to(device))
|
||||
|
||||
generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0]
|
||||
generated_text2 = processor.batch_decode(generated_ids2, skip_special_tokens=True)[0]
|
||||
#print(generated_text1,'generated_text1')
|
||||
#print(generated_text2, 'generated_text2')
|
||||
#print('########################################')
|
||||
|
||||
match = sq(None, generated_text1, generated_text2).find_longest_match(
|
||||
0, len(generated_text1), 0, len(generated_text2))
|
||||
generated_text = generated_text1 + generated_text2[match.b+match.size:]
|
||||
except:
|
||||
pixel_values = processor(textline_image, return_tensors="pt").pixel_values
|
||||
generated_ids = model_ocr.generate(pixel_values.to(device))
|
||||
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
|
||||
return generated_text
|
||||
|
||||
def return_list_of_contours_with_desired_order(self, ls_cons, sorted_indexes):
|
||||
return list(np.array(ls_cons)[np.array(sorted_indexes)])
|
||||
|
|
@ -4009,8 +3794,6 @@ class Eynollah:
|
|||
enabled_modes.append("Light textline detection")
|
||||
if self.full_layout:
|
||||
enabled_modes.append("Full layout analysis")
|
||||
if self.ocr:
|
||||
enabled_modes.append("OCR")
|
||||
if self.tables:
|
||||
enabled_modes.append("Table detection")
|
||||
if enabled_modes:
|
||||
|
|
@ -4130,21 +3913,12 @@ class Eynollah:
|
|||
id_of_texts_tot =['region_0001']
|
||||
conf_contours_textregions =[0]
|
||||
|
||||
if self.ocr and not self.tr:
|
||||
gc.collect()
|
||||
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons, np.zeros((len(all_found_textline_polygons), 4)),
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), textline_light=True)
|
||||
else:
|
||||
ocr_all_textlines = None
|
||||
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||
cont_page, page_coord, order_text_new, id_of_texts_tot,
|
||||
all_found_textline_polygons, page_coord, [],
|
||||
[], [], [], [], [], [],
|
||||
slopes, [], [],
|
||||
cont_page, [], [],
|
||||
ocr_all_textlines=ocr_all_textlines,
|
||||
conf_contours_textregion=conf_contours_textregions,
|
||||
skip_layout_reading_order=True)
|
||||
self.logger.info("Basic processing complete")
|
||||
|
|
@ -4629,94 +4403,6 @@ class Eynollah:
|
|||
boxes_d, textline_mask_tot_d)
|
||||
self.logger.info(f"Detection of reading order took {time.time() - t_order:.1f}s")
|
||||
|
||||
ocr_all_textlines = None
|
||||
ocr_all_textlines_marginals_left = None
|
||||
ocr_all_textlines_marginals_right = None
|
||||
ocr_all_textlines_h = None
|
||||
ocr_all_textlines_drop = None
|
||||
if self.ocr:
|
||||
self.logger.info("Step 4.5/5: OCR Processing")
|
||||
|
||||
if not self.tr:
|
||||
gc.collect()
|
||||
|
||||
if len(all_found_textline_polygons):
|
||||
ocr_all_textlines = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons, all_box_coord,
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if len(all_found_textline_polygons_marginals_left):
|
||||
ocr_all_textlines_marginals_left = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons_marginals_left, all_box_coord_marginals_left,
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if len(all_found_textline_polygons_marginals_right):
|
||||
ocr_all_textlines_marginals_right = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons_marginals_right, all_box_coord_marginals_right,
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if self.full_layout and len(all_found_textline_polygons):
|
||||
ocr_all_textlines_h = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, all_found_textline_polygons_h, all_box_coord_h,
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
if self.full_layout and len(polygons_of_drop_capitals):
|
||||
ocr_all_textlines_drop = return_rnn_cnn_ocr_of_given_textlines(
|
||||
image_page, polygons_of_drop_capitals, np.zeros((len(polygons_of_drop_capitals), 4)),
|
||||
self.model_zoo.get("ocr"), self.b_s_ocr, self.model_zoo.get("num_to_char"), self.textline_light, self.curved_line)
|
||||
|
||||
else:
|
||||
if self.light_version:
|
||||
self.logger.info("Using light version OCR")
|
||||
if self.textline_light:
|
||||
self.logger.info("Using light text line detection for OCR")
|
||||
self.logger.info("Processing text lines...")
|
||||
|
||||
gc.collect()
|
||||
|
||||
torch.cuda.empty_cache()
|
||||
self.model_zoo.get("ocr").to(self.device)
|
||||
|
||||
ind_tot = 0
|
||||
#cv2.imwrite('./img_out.png', image_page)
|
||||
ocr_all_textlines = []
|
||||
# FIXME: what about lines in marginals / headings / drop-capitals here?
|
||||
for indexing, ind_poly_first in enumerate(all_found_textline_polygons):
|
||||
ocr_textline_in_textregion = []
|
||||
for indexing2, ind_poly in enumerate(ind_poly_first):
|
||||
if not (self.textline_light or self.curved_line):
|
||||
ind_poly = copy.deepcopy(ind_poly)
|
||||
box_ind = all_box_coord[indexing]
|
||||
#print(ind_poly,np.shape(ind_poly), 'ind_poly')
|
||||
#print(box_ind)
|
||||
ind_poly = return_textline_contour_with_added_box_coordinate(ind_poly, box_ind)
|
||||
#print(ind_poly_copy)
|
||||
ind_poly[ind_poly<0] = 0
|
||||
x, y, w, h = cv2.boundingRect(ind_poly)
|
||||
#print(ind_poly_copy, np.shape(ind_poly_copy))
|
||||
#print(x, y, w, h, h/float(w),'ratio')
|
||||
h2w_ratio = h/float(w)
|
||||
mask_poly = np.zeros(image_page.shape)
|
||||
if not self.light_version:
|
||||
img_poly_on_img = np.copy(image_page)
|
||||
else:
|
||||
img_poly_on_img = np.copy(img_bin_light)
|
||||
mask_poly = cv2.fillPoly(mask_poly, pts=[ind_poly], color=(1, 1, 1))
|
||||
|
||||
if self.textline_light:
|
||||
mask_poly = cv2.dilate(mask_poly, KERNEL, iterations=1)
|
||||
img_poly_on_img[:,:,0][mask_poly[:,:,0] ==0] = 255
|
||||
img_poly_on_img[:,:,1][mask_poly[:,:,0] ==0] = 255
|
||||
img_poly_on_img[:,:,2][mask_poly[:,:,0] ==0] = 255
|
||||
|
||||
img_croped = img_poly_on_img[y:y+h, x:x+w, :]
|
||||
#cv2.imwrite('./extracted_lines/'+str(ind_tot)+'.jpg', img_croped)
|
||||
text_ocr = self.return_ocr_of_textline_without_common_section(
|
||||
img_croped, self.model_zoo.get("ocr"), self.model_zoo.get("trocr_processor"), self.device, w, h2w_ratio, ind_tot)
|
||||
ocr_textline_in_textregion.append(text_ocr)
|
||||
ind_tot = ind_tot +1
|
||||
ocr_all_textlines.append(ocr_textline_in_textregion)
|
||||
|
||||
self.logger.info("Step 5/5: Output Generation")
|
||||
|
||||
if self.full_layout:
|
||||
|
|
@ -4728,9 +4414,7 @@ class Eynollah:
|
|||
all_found_textline_polygons_marginals_left, all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, slopes_h, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines, ocr_all_textlines, ocr_all_textlines_h,
|
||||
ocr_all_textlines_marginals_left, ocr_all_textlines_marginals_right,
|
||||
ocr_all_textlines_drop,
|
||||
cont_page, polygons_seplines,
|
||||
conf_contours_textregions, conf_contours_textregions_h)
|
||||
else:
|
||||
pcgts = self.writer.build_pagexml_no_full_layout(
|
||||
|
|
@ -4741,9 +4425,6 @@ class Eynollah:
|
|||
all_box_coord_marginals_left, all_box_coord_marginals_right,
|
||||
slopes, slopes_marginals_left, slopes_marginals_right,
|
||||
cont_page, polygons_seplines, contours_tables,
|
||||
ocr_all_textlines=ocr_all_textlines,
|
||||
ocr_all_textlines_marginals_left=ocr_all_textlines_marginals_left,
|
||||
ocr_all_textlines_marginals_right=ocr_all_textlines_marginals_right,
|
||||
conf_contours_textregions=conf_contours_textregions)
|
||||
|
||||
return pcgts
|
||||
|
|
|
|||
|
|
@ -473,8 +473,7 @@ class Eynollah_ocr:
|
|||
img = cv2.imread(dir_img)
|
||||
if dir_in_bin is not None:
|
||||
cropped_lines_bin = []
|
||||
dir_img_bin = os.path.join(dir_in_bin, file_name+'.png')
|
||||
img_bin = cv2.imread(dir_img_bin)
|
||||
img_bin = cv2.imread(os.path.join(dir_in_bin, file_name+'.png'))
|
||||
|
||||
if dir_out_image_text:
|
||||
out_image_with_text = os.path.join(dir_out_image_text, file_name+'.png')
|
||||
|
|
|
|||
|
|
@ -13,12 +13,6 @@ from ocrd_models.constants import NAMESPACES as NS
|
|||
"--textline_light", "--light_version"],
|
||||
# -ep ...
|
||||
# -eoi ...
|
||||
# FIXME: find out whether OCR extra was installed, otherwise skip these
|
||||
["--do_ocr"],
|
||||
["--do_ocr", "--light_version", "--textline_light"],
|
||||
["--do_ocr", "--transformer_ocr"],
|
||||
#["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light"],
|
||||
["--do_ocr", "--transformer_ocr", "--light_version", "--textline_light", "--full-layout"],
|
||||
# --skip_layout_and_reading_order
|
||||
], ids=str)
|
||||
def test_run_eynollah_layout_filename(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue