refactor to remove data-dependency from all Eynollah methods…

- `cache_images()`: only return an image dict (plus extra keys
  for file name stem and dpi) - don't set any attributes
- `imread()`: just take from passed image dict, also add `binary` key
- `resize_and_enhance_image_with_column_classifier()`:
  * `imread()` from image dict
  * set `img_bin` key for binarization result if `input_binary`
  * instead of `image_page_org_size` / `page_coord` attributes,
    set `img_page` / `coord_page` in image dict
  * instead of retval, set `img_res` in image dict
  * also set `scale_x` and `scale_y` in image dict, resp.
  * simplify
- `resize_image_with_column_classifier()`:
  * `imread()` from image dict
  * (as in `resize_and_enhance_with_column_classifier`:)
    call `calculate_width_height_by_columns_1_2` if `num_col` is
    1 or 2 here
  * instead of retval, set `img_res` in image dict
  * also set `scale_x` and `scale_y` in image dict, resp.
  * simplify
- `calculate_width_height_by_columns*()`: simplify, get confidence of
  num_col instead of entire array
- `extract_page()`: read `img_res` from image dict; simplify
- `early_page_for_num_of_column_classification()`:
  `imread()` from image dict; simplify
- `textline_contours()`: no need for `num_col_classifier` here
- `run_textline()`: no need for `num_col_classifier` here
- `get_regions_light_v()` → `get_regions()`:
  * read `img_res` from image dict
  * get shapes via `img` from image dict instead of `image_org` attr
  * use `img_page` / `coord_page` from image dict instead of attrs
  * avoid unnecessary 3-channel arrays
  * simplify
- `get_tables_from_model()`: no need for `num_col_classifier` here
- `run_graphics_and_columns_light()` → `run_graphics_and_columns()`:
  * pass through image dict instead of `img_bin` (which really was `img_res`)
  * simplify
- `run_graphics_and_columns_without_layout()`:
  * pass through image dict instead of `img_bin` (which really was `img_res`)
  * simplify
- `run_enhancement()`: pass through image dict
- `get_image_and_sclaes*()`: drop
- `run_boxes_full_layout()`:
  * pass `image_page` instead of `img_bin` (which really was `image_page`)
  * simplify
- `run()`:
  * instantiate plotter outside of loop, and independent of img files
  * move writer instantiation and overwrite checks into `run_single()`
  * add try/catch for `run_single()` w/ logging
- `reset_file_name_dir`: drop
- `run_single()`:
  * add some args/kwargs from `run()`
  * call `cache_images()` (reading image dict) here
  * instantiate writer here instead of (reused) attr in `run()`
  * set `scale_x` / `scale_y` in writer from image dict once known
    (i.e. after `run_enhancement()`)
  * don't return anything, but write PAGE result here
- `check_any_text_region_in_model_one_is_main_or_header_light()` →
  `split_textregion_main_vs_header()`
- plotter:
  * pass `name` (file stem) from image dict to all methods
  * for `write_images_into_directory()`: also `scale_x` and `scale_y`
    from image dict
- writer:
  * init with width/height
- ocrd processor:
  * adapt (just `run_single()` call)
  * drop `max_workers=1` restriction (can now run fully parallel)
- `get_textregion_contours_in_org_image_light()` →
  `get_textregion_confidences()`:
  * take shape from confmat directly instead of extra array
  * simplify
This commit is contained in:
Robert Sachunsky 2026-03-13 01:44:39 +01:00
parent 800c55b826
commit becf031c65
7 changed files with 371 additions and 536 deletions

File diff suppressed because it is too large Load diff

View file

@ -26,10 +26,6 @@ class EynollahPlotter:
dir_of_deskewed, dir_of_deskewed,
dir_of_layout, dir_of_layout,
dir_of_cropped_images, dir_of_cropped_images,
image_filename_stem,
image_org=None,
scale_x=1,
scale_y=1,
): ):
self.dir_out = dir_out self.dir_out = dir_out
self.dir_of_all = dir_of_all self.dir_of_all = dir_of_all
@ -37,13 +33,8 @@ class EynollahPlotter:
self.dir_of_layout = dir_of_layout self.dir_of_layout = dir_of_layout
self.dir_of_cropped_images = dir_of_cropped_images self.dir_of_cropped_images = dir_of_cropped_images
self.dir_of_deskewed = dir_of_deskewed self.dir_of_deskewed = dir_of_deskewed
self.image_filename_stem = image_filename_stem
# XXX TODO hacky these cannot be set at init time
self.image_org = image_org
self.scale_x : float = scale_x
self.scale_y : float = scale_y
def save_plot_of_layout_main(self, text_regions_p, image_page): def save_plot_of_layout_main(self, text_regions_p, image_page, name=None):
if self.dir_of_layout is not None: if self.dir_of_layout is not None:
values = np.unique(text_regions_p[:, :]) values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
@ -55,10 +46,10 @@ class EynollahPlotter:
colors = [im.cmap(im.norm(value)) for value in values] colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout_main.png")) plt.savefig(os.path.join(self.dir_of_layout,
(name or "page") + "_layout_main.png"))
def save_plot_of_layout_main_all(self, text_regions_p, image_page): def save_plot_of_layout_main_all(self, text_regions_p, image_page, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
values = np.unique(text_regions_p[:, :]) values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
@ -73,9 +64,10 @@ class EynollahPlotter:
colors = [im.cmap(im.norm(value)) for value in values] colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_main_and_page.png")) plt.savefig(os.path.join(self.dir_of_all,
(name or "page") + "_layout_main_and_page.png"))
def save_plot_of_layout(self, text_regions_p, image_page): def save_plot_of_layout(self, text_regions_p, image_page, name=None):
if self.dir_of_layout is not None: if self.dir_of_layout is not None:
values = np.unique(text_regions_p[:, :]) values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
@ -87,9 +79,10 @@ class EynollahPlotter:
colors = [im.cmap(im.norm(value)) for value in values] colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=40)
plt.savefig(os.path.join(self.dir_of_layout, self.image_filename_stem + "_layout.png")) plt.savefig(os.path.join(self.dir_of_layout,
(name or "page") + "_layout.png"))
def save_plot_of_layout_all(self, text_regions_p, image_page): def save_plot_of_layout_all(self, text_regions_p, image_page, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
values = np.unique(text_regions_p[:, :]) values = np.unique(text_regions_p[:, :])
# pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics'] # pixels=['Background' , 'Main text' , 'Heading' , 'Marginalia' ,'Drop capitals' , 'Images' , 'Seperators' , 'Tables', 'Graphics']
@ -104,9 +97,10 @@ class EynollahPlotter:
colors = [im.cmap(im.norm(value)) for value in values] colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_layout_and_page.png")) plt.savefig(os.path.join(self.dir_of_all,
(name or "page") + "_layout_and_page.png"))
def save_plot_of_textlines(self, textline_mask_tot_ea, image_page): def save_plot_of_textlines(self, textline_mask_tot_ea, image_page, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
values = np.unique(textline_mask_tot_ea[:, :]) values = np.unique(textline_mask_tot_ea[:, :])
pixels = ["Background", "Textlines"] pixels = ["Background", "Textlines"]
@ -120,24 +114,31 @@ class EynollahPlotter:
colors = [im.cmap(im.norm(value)) for value in values] colors = [im.cmap(im.norm(value)) for value in values]
patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values] patches = [mpatches.Patch(color=colors[np.where(values == i)[0][0]], label="{l}".format(l=pixels[int(np.where(values_indexes == i)[0][0])])) for i in values]
plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0, fontsize=60)
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem + "_textline_and_page.png")) plt.savefig(os.path.join(self.dir_of_all,
(name or "page") + "_textline_and_page.png"))
def save_deskewed_image(self, slope_deskew): def save_deskewed_image(self, slope_deskew, image_org, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_org.png"), self.image_org) cv2.imwrite(os.path.join(self.dir_of_all,
(name or "page") + "_org.png"), image_org)
if self.dir_of_deskewed is not None: if self.dir_of_deskewed is not None:
img_rotated = rotate_image_different(self.image_org, slope_deskew) img_rotated = rotate_image_different(image_org, slope_deskew)
cv2.imwrite(os.path.join(self.dir_of_deskewed, self.image_filename_stem + "_deskewed.png"), img_rotated) cv2.imwrite(os.path.join(self.dir_of_deskewed,
(name or "page") + "_deskewed.png"), img_rotated)
def save_page_image(self, image_page): def save_page_image(self, image_page, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
cv2.imwrite(os.path.join(self.dir_of_all, self.image_filename_stem + "_page.png"), image_page) cv2.imwrite(os.path.join(self.dir_of_all,
(name or "page") + "_page.png"), image_page)
if self.dir_save_page is not None: if self.dir_save_page is not None:
cv2.imwrite(os.path.join(self.dir_save_page, self.image_filename_stem + "_page.png"), image_page) cv2.imwrite(os.path.join(self.dir_save_page,
def save_enhanced_image(self, img_res): (name or "page") + "_page.png"), image_page)
cv2.imwrite(os.path.join(self.dir_out, self.image_filename_stem + "_enhanced.png"), img_res)
def save_enhanced_image(self, img_res, name=None):
cv2.imwrite(os.path.join(self.dir_out,
(name or "page") + "_enhanced.png"), img_res)
def save_plot_of_textline_density(self, img_patch_org): def save_plot_of_textline_density(self, img_patch_org, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
plt.figure(figsize=(80,40)) plt.figure(figsize=(80,40))
plt.rcParams['font.size']='50' plt.rcParams['font.size']='50'
@ -149,9 +150,10 @@ class EynollahPlotter:
plt.ylabel('Height',fontsize=60) plt.ylabel('Height',fontsize=60)
plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))]) plt.yticks([0,len(gaussian_filter1d(img_patch_org.sum(axis=1), 3))])
plt.gca().invert_yaxis() plt.gca().invert_yaxis()
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_density_of_textline.png')) plt.savefig(os.path.join(self.dir_of_all,
(name or "page") + '_density_of_textline.png'))
def save_plot_of_rotation_angle(self, angels, var_res): def save_plot_of_rotation_angle(self, angels, var_res, name=None):
if self.dir_of_all is not None: if self.dir_of_all is not None:
plt.figure(figsize=(60,30)) plt.figure(figsize=(60,30))
plt.rcParams['font.size']='50' plt.rcParams['font.size']='50'
@ -160,19 +162,20 @@ class EynollahPlotter:
plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50) plt.ylabel('variance of sum of rotated textline in direction of x axis',fontsize=50)
plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$') plt.plot(angels[np.argmax(var_res)],var_res[np.argmax(np.array(var_res))] ,'*',markersize=50,label='Angle of deskewing=' +str("{:.2f}".format(angels[np.argmax(var_res)]))+r'$\degree$')
plt.legend(loc='best') plt.legend(loc='best')
plt.savefig(os.path.join(self.dir_of_all, self.image_filename_stem+'_rotation_angle.png')) plt.savefig(os.path.join(self.dir_of_all,
(name or "page") + '_rotation_angle.png'))
def write_images_into_directory(self, img_contours, image_page): def write_images_into_directory(self, img_contours, image_page, scale_x=1.0, scale_y=1.0, name=None):
if self.dir_of_cropped_images is not None: if self.dir_of_cropped_images is not None:
index = 0 index = 0
for cont_ind in img_contours: for cont_ind in img_contours:
x, y, w, h = cv2.boundingRect(cont_ind) x, y, w, h = cv2.boundingRect(cont_ind)
box = [x, y, w, h] box = [x, y, w, h]
croped_page, page_coord = crop_image_inside_box(box, image_page) image, _ = crop_image_inside_box(box, image_page)
image = resize_image(image,
croped_page = resize_image(croped_page, int(croped_page.shape[0] / self.scale_y), int(croped_page.shape[1] / self.scale_x)) int(image.shape[0] / scale_y),
int(image.shape[1] / scale_x))
path = os.path.join(self.dir_of_cropped_images, self.image_filename_stem + "_" + str(index) + ".jpg") cv2.imwrite(os.path.join(self.dir_of_cropped_images,
cv2.imwrite(path, croped_page) (name or "page") + f"_{index:03d}.jpg"), image)
index += 1 index += 1

View file

@ -8,10 +8,6 @@ from eynollah.model_zoo.model_zoo import EynollahModelZoo
from .eynollah import Eynollah, EynollahXmlWriter from .eynollah import Eynollah, EynollahXmlWriter
class EynollahProcessor(Processor): class EynollahProcessor(Processor):
# already employs background CPU multiprocessing per page
# already employs GPU (without singleton process atm)
max_workers = 1
@cached_property @cached_property
def executable(self) -> str: def executable(self) -> str:
return 'ocrd-eynollah-segment' return 'ocrd-eynollah-segment'
@ -80,14 +76,7 @@ class EynollahProcessor(Processor):
image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file image_filename = "dummy" # will be replaced by ocrd.Processor.process_page_file
result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original result.images.append(OcrdPageResultImage(page_image, '.IMG', page)) # mark as new original
# FIXME: mask out already existing regions (incremental segmentation) # FIXME: mask out already existing regions (incremental segmentation)
self.eynollah.cache_images( self.eynollah.run_single(image_filename, None, img_pil=page_image, pcgts=pcgts,
image_pil=page_image, # ocrd.Processor will handle OCRD_EXISTING_OUTPUT more flexibly
dpi=self.parameter['dpi'], overwrite=True)
)
self.eynollah.writer = EynollahXmlWriter(
dir_out=None,
image_filename=image_filename,
curved_line=self.eynollah.curved_line,
pcgts=pcgts)
self.eynollah.run_single()
return result return result

View file

@ -866,7 +866,7 @@ def check_any_text_region_in_model_one_is_main_or_header(
conf_contours_main, conf_contours_main,
conf_contours_head) conf_contours_head)
def check_any_text_region_in_model_one_is_main_or_header_light( def split_textregion_main_vs_header(
regions_model_1, regions_model_full, regions_model_1, regions_model_full,
contours_only_text_parent, contours_only_text_parent,
all_box_coord, all_found_textline_polygons, all_box_coord, all_found_textline_polygons,

View file

@ -170,7 +170,7 @@ def get_textregion_contours_in_org_image(cnts, img, slope_first):
return cnts_org return cnts_org
def get_textregion_contours_in_org_image_light_old(cnts, img, slope_first): def get_textregion_confidences_old(cnts, img, slope_first):
zoom = 3 zoom = 3
img = cv2.resize(img, (img.shape[1] // zoom, img = cv2.resize(img, (img.shape[1] // zoom,
img.shape[0] // zoom), img.shape[0] // zoom),
@ -208,16 +208,17 @@ def do_back_rotation_and_get_cnt_back(contour_par, index_r_con, img, slope_first
cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0]) cont_int[0][:, 0, 1] = cont_int[0][:, 0, 1] + np.abs(img_copy.shape[0] - img.shape[0])
return cont_int[0], index_r_con, confidence_contour return cont_int[0], index_r_con, confidence_contour
def get_textregion_contours_in_org_image_light(cnts, img, confidence_matrix): def get_textregion_confidences(cnts, confidence_matrix):
if not len(cnts): if not len(cnts):
return [] return []
height, width = confidence_matrix.shape
confidence_matrix = cv2.resize(confidence_matrix, confidence_matrix = cv2.resize(confidence_matrix,
(img.shape[1] // 6, img.shape[0] // 6), (width // 6, height // 6),
interpolation=cv2.INTER_NEAREST) interpolation=cv2.INTER_NEAREST)
confs = [] confs = []
for cnt in cnts: for cnt in cnts:
cnt_mask = np.zeros(confidence_matrix.shape) cnt_mask = np.zeros_like(confidence_matrix)
cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0) cnt_mask = cv2.fillPoly(cnt_mask, pts=[cnt // 6], color=1.0)
confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask)) confs.append(np.sum(confidence_matrix * cnt_mask) / np.sum(cnt_mask))
return confs return confs

View file

@ -1507,9 +1507,9 @@ def do_image_rotation(angle, img=None, sigma_des=1.0, logger=None):
return var return var
def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100, def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
main_page=False, logger=None, plotter=None, map=None): main_page=False, logger=None, plotter=None, name=None, map=None):
if main_page and plotter: if main_page and plotter:
plotter.save_plot_of_textline_density(img_patch_org) plotter.save_plot_of_textline_density(img_patch_org, name)
img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1])) img_int=np.zeros((img_patch_org.shape[0],img_patch_org.shape[1]))
img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0] img_int[:,:]=img_patch_org[:,:]#img_patch_org[:,:,0]
@ -1527,16 +1527,16 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]: if main_page and img_patch_org.shape[1] > img_patch_org.shape[0]:
angles = np.array([-45, 0, 45, 90,]) angles = np.array([-45, 0, 45, 90,])
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, name=name, plotter=plotter)
angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles) angles = np.linspace(angle - 22.5, angle + 22.5, n_tot_angles)
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, name=name, plotter=plotter)
elif main_page: elif main_page:
#angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45]) #angles = np.linspace(-12, 12, n_tot_angles)#np.array([0 , 45 , 90 , -45])
angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4), angles = np.concatenate((np.linspace(-12, -7, n_tot_angles // 4),
np.linspace(-6, 6, n_tot_angles // 2), np.linspace(-6, 6, n_tot_angles // 2),
np.linspace(7, 12, n_tot_angles // 4))) np.linspace(7, 12, n_tot_angles // 4)))
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, name=name, plotter=plotter)
early_slope_edge=11 early_slope_edge=11
if abs(angle) > early_slope_edge: if abs(angle) > early_slope_edge:
@ -1544,12 +1544,12 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
angles2 = np.linspace(-90, -12, n_tot_angles) angles2 = np.linspace(-90, -12, n_tot_angles)
else: else:
angles2 = np.linspace(90, 12, n_tot_angles) angles2 = np.linspace(90, 12, n_tot_angles)
angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter) angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, name=name, plotter=plotter)
if var2 > var: if var2 > var:
angle = angle2 angle = angle2
else: else:
angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10) angles = np.linspace(-25, 25, int(0.5 * n_tot_angles) + 10)
angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, var = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, name=name, plotter=plotter)
early_slope_edge=22 early_slope_edge=22
if abs(angle) > early_slope_edge: if abs(angle) > early_slope_edge:
@ -1557,15 +1557,15 @@ def return_deskew_slop(img_patch_org, sigma_des,n_tot_angles=100,
angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10) angles2 = np.linspace(-90, -25, int(0.5 * n_tot_angles) + 10)
else: else:
angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10) angles2 = np.linspace(90, 25, int(0.5 * n_tot_angles) + 10)
angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, plotter=plotter) angle2, var2 = get_smallest_skew(img_resized, sigma_des, angles2, map=map, logger=logger, name=name, plotter=plotter)
if var2 > var: if var2 > var:
angle = angle2 angle = angle2
# precision stage: # precision stage:
angles = np.linspace(angle - 2.5, angle + 2.5, n_tot_angles // 2) angles = np.linspace(angle - 2.5, angle + 2.5, n_tot_angles // 2)
angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, plotter=plotter) angle, _ = get_smallest_skew(img_resized, sigma_des, angles, map=map, logger=logger, name=name, plotter=plotter)
return angle return angle
def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map): def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, name=None, map=map):
if logger is None: if logger is None:
logger = getLogger(__package__) logger = getLogger(__package__)
if map is None: if map is None:
@ -1576,7 +1576,7 @@ def get_smallest_skew(img, sigma_des, angles, logger=None, plotter=None, map=map
results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None), results = list(map(partial(do_image_rotation, img=img_shared, sigma_des=sigma_des, logger=None),
angles)) angles))
if plotter: if plotter:
plotter.save_plot_of_rotation_angle(angles, results) plotter.save_plot_of_rotation_angle(angles, results, name)
try: try:
var_res = np.array(results) var_res = np.array(results)
assert var_res.any() assert var_res.any()
@ -1595,7 +1595,7 @@ def do_work_of_slopes_new_curved(
box_text, contour_par, box_text, contour_par,
textline_mask_tot_ea=None, mask_texts_only=None, textline_mask_tot_ea=None, mask_texts_only=None,
num_col=1, scale_par=1.0, slope_deskew=0.0, num_col=1, scale_par=1.0, slope_deskew=0.0,
logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None logger=None, MAX_SLOPE=999, KERNEL=None, plotter=None, name=None
): ):
if KERNEL is None: if KERNEL is None:
KERNEL = np.ones((5, 5), np.uint8) KERNEL = np.ones((5, 5), np.uint8)
@ -1626,7 +1626,7 @@ def do_work_of_slopes_new_curved(
else: else:
sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0))) sigma_des = max(1, int(y_diff_mean * (4.0 / 40.0)))
img_int_p[img_int_p > 0] = 1 img_int_p[img_int_p > 0] = 1
slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, plotter=plotter) slope_for_all = return_deskew_slop(img_int_p, sigma_des, logger=logger, name=name, plotter=plotter)
if abs(slope_for_all) < 0.5: if abs(slope_for_all) < 0.5:
slope_for_all = slope_deskew slope_for_all = slope_deskew
except: except:

View file

@ -26,7 +26,7 @@ from .utils.contour import contour2polygon, make_valid
class EynollahXmlWriter: class EynollahXmlWriter:
def __init__(self, *, dir_out, image_filename, curved_line, pcgts=None): def __init__(self, *, dir_out, image_filename, image_width, image_height, curved_line, pcgts=None):
self.logger = logging.getLogger('eynollah.writer') self.logger = logging.getLogger('eynollah.writer')
self.counter = EynollahIdCounter() self.counter = EynollahIdCounter()
self.dir_out = dir_out self.dir_out = dir_out
@ -34,10 +34,10 @@ class EynollahXmlWriter:
self.output_filename = os.path.join(self.dir_out or "", self.image_filename_stem) + ".xml" self.output_filename = os.path.join(self.dir_out or "", self.image_filename_stem) + ".xml"
self.curved_line = curved_line self.curved_line = curved_line
self.pcgts = pcgts self.pcgts = pcgts
self.scale_x: Optional[float] = None # XXX set outside __init__ self.image_height = image_height
self.scale_y: Optional[float] = None # XXX set outside __init__ self.image_width = image_width
self.height_org: Optional[int] = None # XXX set outside __init__ self.scale_x = 1.0
self.width_org: Optional[int] = None # XXX set outside __init__ self.scale_y = 1.0
@property @property
def image_filename_stem(self): def image_filename_stem(self):
@ -49,7 +49,7 @@ class EynollahXmlWriter:
if offset is not None: if offset is not None:
poly = affinity.translate(poly, *offset) poly = affinity.translate(poly, *offset)
poly = affinity.scale(poly, xfact=1 / self.scale_x, yfact=1 / self.scale_y, origin=(0, 0)) poly = affinity.scale(poly, xfact=1 / self.scale_x, yfact=1 / self.scale_y, origin=(0, 0))
poly = make_valid(clip_by_rect(poly, 0, 0, self.width_org, self.height_org)) poly = make_valid(clip_by_rect(poly, 0, 0, self.image_width, self.image_height))
return points_from_polygon(poly.exterior.coords[:-1]) return points_from_polygon(poly.exterior.coords[:-1])
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion): def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
@ -161,7 +161,8 @@ class EynollahXmlWriter:
self.logger.debug('enter build_pagexml') self.logger.debug('enter build_pagexml')
# create the file structure # create the file structure
pcgts = self.pcgts if self.pcgts else create_page_xml(self.image_filename, self.height_org, self.width_org) pcgts = self.pcgts if self.pcgts else create_page_xml(
self.image_filename, self.image_height, self.image_width)
page = pcgts.get_Page() page = pcgts.get_Page()
if len(cont_page): if len(cont_page):
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_points(cont_page[0])))) page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_points(cont_page[0]))))