mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-04-30 19:22:03 +02:00
writer, do_work_of_slopes*: drop passing bboxes around
(needed no more)
This commit is contained in:
parent
b792324c5b
commit
2da718f76f
4 changed files with 21 additions and 72 deletions
|
|
@ -1006,7 +1006,6 @@ class Eynollah:
|
|||
args_textlines = np.arange(len(polygons_of_textlines))
|
||||
all_found_textline_polygons = []
|
||||
slopes = []
|
||||
all_box_coord =[]
|
||||
|
||||
for index, con_region_ind in enumerate(contours_par):
|
||||
results = [cv2.pointPolygonTest(con_region_ind, (cx_main_tot[ind], cy_main_tot[ind]), False)
|
||||
|
|
@ -1026,17 +1025,12 @@ class Eynollah:
|
|||
all_found_textline_polygons.append(textlines_ins)#[::-1])
|
||||
slopes.append(slope_deskew)
|
||||
|
||||
crop_coor = box2rect(boxes[index])
|
||||
all_box_coord.append(crop_coor)
|
||||
|
||||
return (all_found_textline_polygons,
|
||||
all_box_coord,
|
||||
slopes)
|
||||
return all_found_textline_polygons, slopes
|
||||
|
||||
def get_slopes_and_deskew_new_curved(self, contours_par, textline_mask_tot, boxes,
|
||||
num_col, slope_deskew, name):
|
||||
if not len(contours_par):
|
||||
return [], [], []
|
||||
return [], []
|
||||
self.logger.debug("enter get_slopes_and_deskew_new_curved")
|
||||
results = map(partial(do_work_of_slopes_new_curved,
|
||||
textline_mask_tot_ea=textline_mask_tot,
|
||||
|
|
@ -1049,7 +1043,7 @@ class Eynollah:
|
|||
name=name),
|
||||
boxes, contours_par)
|
||||
results = list(results) # exhaust prior to release
|
||||
#textline_polygons, box_coord, slopes = zip(*results)
|
||||
#textline_polygons, slopes = zip(*results)
|
||||
self.logger.debug("exit get_slopes_and_deskew_new_curved")
|
||||
return tuple(zip(*results))
|
||||
|
||||
|
|
@ -2064,7 +2058,7 @@ class Eynollah:
|
|||
)
|
||||
|
||||
def separate_marginals_to_left_and_right_and_order_from_top_to_down(
|
||||
self, polygons_of_marginals, all_found_textline_polygons_marginals, all_box_coord_marginals,
|
||||
self, polygons_of_marginals, all_found_textline_polygons_marginals,
|
||||
slopes_marginals, conf_marginals, mid_point_of_page_width):
|
||||
cx_marg, cy_marg = find_center_of_contours(polygons_of_marginals)
|
||||
cx_marg = ensure_array(cx_marg)
|
||||
|
|
@ -2086,7 +2080,6 @@ class Eynollah:
|
|||
|
||||
return (*splitsort(polygons_of_marginals),
|
||||
*splitsort(all_found_textline_polygons_marginals),
|
||||
*splitsort(all_box_coord_marginals),
|
||||
*splitsort(slopes_marginals),
|
||||
*splitsort(conf_marginals))
|
||||
|
||||
|
|
@ -2264,15 +2257,12 @@ class Eynollah:
|
|||
page_coord=page_coord,
|
||||
order_of_texts=order_text_new,
|
||||
all_found_textline_polygons=all_found_textline_polygons,
|
||||
all_box_coord=page_coord,
|
||||
found_polygons_images=[],
|
||||
found_polygons_tables=[],
|
||||
found_polygons_marginals_left=[],
|
||||
found_polygons_marginals_right=[],
|
||||
all_found_textline_polygons_marginals_left=[],
|
||||
all_found_textline_polygons_marginals_right=[],
|
||||
all_box_coord_marginals_left=[],
|
||||
all_box_coord_marginals_right=[],
|
||||
slopes=slopes,
|
||||
slopes_marginals_left=[],
|
||||
slopes_marginals_right=[],
|
||||
|
|
@ -2337,15 +2327,12 @@ class Eynollah:
|
|||
page_coord=page_coord,
|
||||
order_of_texts=[],
|
||||
all_found_textline_polygons=[],
|
||||
all_box_coord=[],
|
||||
found_polygons_images=[],
|
||||
found_polygons_tables=[],
|
||||
found_polygons_marginals_left=[],
|
||||
found_polygons_marginals_right=[],
|
||||
all_found_textline_polygons_marginals_left=[],
|
||||
all_found_textline_polygons_marginals_right=[],
|
||||
all_box_coord_marginals_left=[],
|
||||
all_box_coord_marginals_right=[],
|
||||
slopes=[],
|
||||
slopes_marginals_left=[],
|
||||
slopes_marginals_right=[],
|
||||
|
|
@ -2486,12 +2473,12 @@ class Eynollah:
|
|||
#print("text region early 5 in %.1fs", time.time() - t0)
|
||||
## birdan sora chock chakir
|
||||
if not self.curved_line:
|
||||
all_found_textline_polygons, \
|
||||
all_box_coord, slopes = self.get_slopes_and_deskew_new_light2(
|
||||
all_found_textline_polygons, slopes = \
|
||||
self.get_slopes_and_deskew_new_light2(
|
||||
polygons_of_textregions, textline_mask_tot_ea_org,
|
||||
boxes_text, slope_deskew)
|
||||
all_found_textline_polygons_marginals, \
|
||||
all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_light2(
|
||||
all_found_textline_polygons_marginals, slopes_marginals = \
|
||||
self.get_slopes_and_deskew_new_light2(
|
||||
polygons_of_marginals, textline_mask_tot_ea_org,
|
||||
boxes_marginals, slope_deskew)
|
||||
|
||||
|
|
@ -2503,15 +2490,15 @@ class Eynollah:
|
|||
all_found_textline_polygons_marginals)
|
||||
else:
|
||||
textline_mask_tot_ea_erode = cv2.erode(textline_mask_tot_ea, kernel=KERNEL, iterations=2)
|
||||
all_found_textline_polygons, \
|
||||
all_box_coord, slopes = self.get_slopes_and_deskew_new_curved(
|
||||
all_found_textline_polygons, slopes = \
|
||||
self.get_slopes_and_deskew_new_curved(
|
||||
polygons_of_textregions, textline_mask_tot_ea_erode,
|
||||
boxes_text,
|
||||
num_col_classifier, slope_deskew, image['name'])
|
||||
all_found_textline_polygons = small_textlines_to_parent_adherence2(
|
||||
all_found_textline_polygons, textline_mask_tot_ea, num_col_classifier)
|
||||
all_found_textline_polygons_marginals, \
|
||||
all_box_coord_marginals, slopes_marginals = self.get_slopes_and_deskew_new_curved(
|
||||
all_found_textline_polygons_marginals, slopes_marginals = \
|
||||
self.get_slopes_and_deskew_new_curved(
|
||||
polygons_of_marginals, textline_mask_tot_ea_erode,
|
||||
boxes_marginals,
|
||||
num_col_classifier, slope_deskew, image['name'])
|
||||
|
|
@ -2533,8 +2520,6 @@ class Eynollah:
|
|||
polygons_of_marginals_right,
|
||||
all_found_textline_polygons_marginals_left,
|
||||
all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left,
|
||||
all_box_coord_marginals_right,
|
||||
slopes_marginals_left,
|
||||
slopes_marginals_right,
|
||||
conf_marginals_left,
|
||||
|
|
@ -2542,7 +2527,6 @@ class Eynollah:
|
|||
self.separate_marginals_to_left_and_right_and_order_from_top_to_down(
|
||||
polygons_of_marginals,
|
||||
all_found_textline_polygons_marginals,
|
||||
all_box_coord_marginals,
|
||||
slopes_marginals,
|
||||
conf_marginals,
|
||||
0.5 * text_regions_p.shape[1])
|
||||
|
|
@ -2556,8 +2540,6 @@ class Eynollah:
|
|||
polygons_of_textregions_h,
|
||||
polygons_of_textregions_d,
|
||||
polygons_of_textregions_h_d,
|
||||
all_box_coord,
|
||||
all_box_coord_h,
|
||||
all_found_textline_polygons,
|
||||
all_found_textline_polygons_h,
|
||||
slopes,
|
||||
|
|
@ -2568,7 +2550,6 @@ class Eynollah:
|
|||
regions_fully,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_d,
|
||||
all_box_coord,
|
||||
all_found_textline_polygons,
|
||||
slopes,
|
||||
conf_textregions)
|
||||
|
|
@ -2576,10 +2557,6 @@ class Eynollah:
|
|||
if self.plotter:
|
||||
self.plotter.save_plot_of_layout(text_regions_p, image_page, image['name'])
|
||||
self.plotter.save_plot_of_layout_all(text_regions_p, image_page, image['name'])
|
||||
##all_found_textline_polygons = adhere_drop_capital_region_into_corresponding_textline(
|
||||
##text_regions_p, polygons_of_drop_capitals, polygons_of_textregions, polygons_of_textregions_h,
|
||||
##all_box_coord, all_box_coord_h, all_found_textline_polygons, all_found_textline_polygons_h,
|
||||
##kernel=KERNEL, curved_line=self.curved_line)
|
||||
else:
|
||||
polygons_of_drop_capitals = []
|
||||
polygons_of_textregions_h = []
|
||||
|
|
@ -2640,8 +2617,6 @@ class Eynollah:
|
|||
order_of_texts=order_text_new,
|
||||
all_found_textline_polygons=all_found_textline_polygons,
|
||||
all_found_textline_polygons_h=all_found_textline_polygons_h,
|
||||
all_box_coord=all_box_coord,
|
||||
all_box_coord_h=all_box_coord_h,
|
||||
found_polygons_images=polygons_of_images,
|
||||
found_polygons_tables=polygons_of_tables,
|
||||
found_polygons_drop_capitals=polygons_of_drop_capitals,
|
||||
|
|
@ -2649,8 +2624,6 @@ class Eynollah:
|
|||
found_polygons_marginals_right=polygons_of_marginals_right,
|
||||
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
||||
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
||||
all_box_coord_marginals_right=all_box_coord_marginals_right,
|
||||
slopes=slopes,
|
||||
slopes_h=slopes_h,
|
||||
slopes_marginals_left=slopes_marginals_left,
|
||||
|
|
@ -2671,15 +2644,12 @@ class Eynollah:
|
|||
page_coord=page_coord,
|
||||
order_of_texts=order_text_new,
|
||||
all_found_textline_polygons=all_found_textline_polygons,
|
||||
all_box_coord=all_box_coord,
|
||||
found_polygons_images=polygons_of_images,
|
||||
found_polygons_tables=polygons_of_tables,
|
||||
found_polygons_marginals_left=polygons_of_marginals_left,
|
||||
found_polygons_marginals_right=polygons_of_marginals_right,
|
||||
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
||||
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
||||
all_box_coord_marginals_right=all_box_coord_marginals_right,
|
||||
slopes=slopes,
|
||||
slopes_marginals_left=slopes_marginals_left,
|
||||
slopes_marginals_right=slopes_marginals_right,
|
||||
|
|
|
|||
|
|
@ -887,7 +887,6 @@ def split_textregion_main_vs_head(
|
|||
regions_model_full,
|
||||
polygons_of_textregions,
|
||||
polygons_of_textregions_d,
|
||||
all_box_coord,
|
||||
all_found_textline_polygons,
|
||||
slopes,
|
||||
conf_textregions,
|
||||
|
|
@ -960,8 +959,6 @@ def split_textregion_main_vs_head(
|
|||
select(polygons_of_textregions, head),
|
||||
select(polygons_of_textregions_d, main),
|
||||
select(polygons_of_textregions_d, head),
|
||||
select(all_box_coord, main),
|
||||
select(all_box_coord, head),
|
||||
select(all_found_textline_polygons, main),
|
||||
select(all_found_textline_polygons, head),
|
||||
select(slopes, main),
|
||||
|
|
|
|||
|
|
@ -1645,7 +1645,6 @@ def do_work_of_slopes_new_curved(
|
|||
|
||||
# print(slope, slope_deskew)
|
||||
|
||||
crop_coor = box2rect(box_text)
|
||||
if abs(slope) < 45:
|
||||
mask_parent = np.zeros((h, w), dtype=np.uint8)
|
||||
mask_parent = cv2.fillPoly(mask_parent, pts=[contour_par - [x, y]], color=1)
|
||||
|
|
@ -1676,7 +1675,7 @@ def do_work_of_slopes_new_curved(
|
|||
slope, contour_par,
|
||||
box_text)
|
||||
|
||||
return textlines_cnt_per_region[::-1], crop_coor, slope
|
||||
return textlines_cnt_per_region[::-1], slope
|
||||
|
||||
def do_work_of_slopes_new_light(
|
||||
box_text, contour, contour_par,
|
||||
|
|
@ -1688,7 +1687,6 @@ def do_work_of_slopes_new_light(
|
|||
logger.debug('enter do_work_of_slopes_new_light')
|
||||
|
||||
x, y, w, h = box_text
|
||||
crop_coor = box2rect(box_text)
|
||||
mask_textline = np.zeros(textline_mask_tot_ea.shape)
|
||||
mask_textline = cv2.fillPoly(mask_textline, pts=[contour], color=(1,1,1))
|
||||
all_text_region_raw = textline_mask_tot_ea * mask_textline
|
||||
|
|
@ -1703,4 +1701,4 @@ def do_work_of_slopes_new_light(
|
|||
cnt_clean_rot = filter_contours_area_of_image(all_text_region_raw, cnt_clean_rot_raw, hir_on_cnt_clean_rot,
|
||||
max_area=1, min_area=0.00001)
|
||||
|
||||
return cnt_clean_rot, crop_coor, slope_deskew
|
||||
return cnt_clean_rot, slope_deskew
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class EynollahXmlWriter:
|
|||
poly = make_valid(clip_by_rect(poly, 0, 0, self.image_width, self.image_height))
|
||||
return points_from_polygon(poly.exterior.coords[:-1])
|
||||
|
||||
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, all_box_coord, slopes, counter, ocr_all_textlines_textregion):
|
||||
def serialize_lines_in_region(self, text_region, all_found_textline_polygons, region_idx, page_coord, slopes, counter, ocr_all_textlines_textregion):
|
||||
for j, polygon_textline in enumerate(all_found_textline_polygons[region_idx]):
|
||||
coords = CoordsType()
|
||||
textline = TextLineType(id=counter.next_line_id, Coords=coords)
|
||||
|
|
@ -60,12 +60,7 @@ class EynollahXmlWriter:
|
|||
textline.set_TextEquiv([TextEquivType(Unicode=ocr_all_textlines_textregion[j])])
|
||||
text_region.add_TextLine(textline)
|
||||
text_region.set_orientation(-slopes[region_idx])
|
||||
region_bboxes = all_box_coord[region_idx]
|
||||
offset = [page_coord[2], page_coord[0]]
|
||||
# FIXME: or actually... self.curved_line or np.abs(slopes[region_idx]) > 45?
|
||||
if self.curved_line and np.abs(slopes[region_idx]) > 45:
|
||||
offset[0] += region_bboxes[2]
|
||||
offset[1] += region_bboxes[0]
|
||||
coords.set_points(self.calculate_points(polygon_textline, offset))
|
||||
|
||||
def write_pagexml(self, pcgts):
|
||||
|
|
@ -80,15 +75,12 @@ class EynollahXmlWriter:
|
|||
page_coord,
|
||||
order_of_texts,
|
||||
all_found_textline_polygons,
|
||||
all_box_coord,
|
||||
found_polygons_images,
|
||||
found_polygons_tables,
|
||||
found_polygons_marginals_left,
|
||||
found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left,
|
||||
all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left,
|
||||
all_box_coord_marginals_right,
|
||||
slopes,
|
||||
slopes_marginals_left,
|
||||
slopes_marginals_right,
|
||||
|
|
@ -111,8 +103,6 @@ class EynollahXmlWriter:
|
|||
order_of_texts=order_of_texts,
|
||||
all_found_textline_polygons=all_found_textline_polygons,
|
||||
all_found_textline_polygons_h=[],
|
||||
all_box_coord=all_box_coord,
|
||||
all_box_coord_h=[],
|
||||
found_polygons_images=found_polygons_images,
|
||||
found_polygons_tables=found_polygons_tables,
|
||||
found_polygons_drop_capitals=[],
|
||||
|
|
@ -120,8 +110,6 @@ class EynollahXmlWriter:
|
|||
found_polygons_marginals_right=found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left=all_found_textline_polygons_marginals_left,
|
||||
all_found_textline_polygons_marginals_right=all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left=all_box_coord_marginals_left,
|
||||
all_box_coord_marginals_right=all_box_coord_marginals_right,
|
||||
slopes=slopes,
|
||||
slopes_h=[],
|
||||
slopes_marginals_left=slopes_marginals_left,
|
||||
|
|
@ -147,8 +135,6 @@ class EynollahXmlWriter:
|
|||
order_of_texts,
|
||||
all_found_textline_polygons,
|
||||
all_found_textline_polygons_h,
|
||||
all_box_coord,
|
||||
all_box_coord_h,
|
||||
found_polygons_images,
|
||||
found_polygons_tables,
|
||||
found_polygons_drop_capitals,
|
||||
|
|
@ -156,8 +142,6 @@ class EynollahXmlWriter:
|
|||
found_polygons_marginals_right,
|
||||
all_found_textline_polygons_marginals_left,
|
||||
all_found_textline_polygons_marginals_right,
|
||||
all_box_coord_marginals_left,
|
||||
all_box_coord_marginals_right,
|
||||
slopes,
|
||||
slopes_h,
|
||||
slopes_marginals_left,
|
||||
|
|
@ -214,7 +198,7 @@ class EynollahXmlWriter:
|
|||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons, mm, page_coord,
|
||||
all_box_coord, slopes, counter, ocr_textlines)
|
||||
slopes, counter, ocr_textlines)
|
||||
|
||||
self.logger.debug('len(found_polygons_text_region_h) %s', len(found_polygons_text_region_h))
|
||||
for mm, region_contour in enumerate(found_polygons_text_region_h):
|
||||
|
|
@ -231,7 +215,7 @@ class EynollahXmlWriter:
|
|||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(textregion, all_found_textline_polygons_h, mm, page_coord,
|
||||
all_box_coord_h, slopes_h, counter, ocr_textlines)
|
||||
slopes_h, counter, ocr_textlines)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_drop_capitals):
|
||||
dropcapital = TextRegionType(
|
||||
|
|
@ -241,14 +225,13 @@ class EynollahXmlWriter:
|
|||
if conf_drops:
|
||||
dropcapital.Coords.set_conf(conf_drops[mm])
|
||||
page.add_TextRegion(dropcapital)
|
||||
all_box_coord_drop = [[0, 0, 0, 0]]
|
||||
slopes_drop = [0]
|
||||
if ocr_all_textlines_drop:
|
||||
ocr_textlines = ocr_all_textlines_drop[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(dropcapital, [[found_polygons_drop_capitals[mm]]], 0, page_coord,
|
||||
all_box_coord_drop, slopes_drop, counter, ocr_textlines)
|
||||
slopes_drop, counter, ocr_textlines)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_marginals_left):
|
||||
marginal = TextRegionType(
|
||||
|
|
@ -262,7 +245,8 @@ class EynollahXmlWriter:
|
|||
ocr_textlines = ocr_all_textlines_marginals_left[mm]
|
||||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord, all_box_coord_marginals_left, slopes_marginals_left, counter, ocr_textlines)
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_left, mm, page_coord,
|
||||
slopes_marginals_left, counter, ocr_textlines)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_marginals_right):
|
||||
marginal = TextRegionType(
|
||||
|
|
@ -277,7 +261,7 @@ class EynollahXmlWriter:
|
|||
else:
|
||||
ocr_textlines = None
|
||||
self.serialize_lines_in_region(marginal, all_found_textline_polygons_marginals_right, mm, page_coord,
|
||||
all_box_coord_marginals_right, slopes_marginals_right, counter, ocr_textlines)
|
||||
slopes_marginals_right, counter, ocr_textlines)
|
||||
|
||||
for mm, region_contour in enumerate(found_polygons_images):
|
||||
image = ImageRegionType(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue