bold run_single refactoring (predict segmentation on cropped img)…

- move `extract_page()` to the start (right after enhancement),
  so early layout and textline model prediction sees cropped
  image
- `extract_page()`: also return page mask
- `get_early_layout()`:
  * use cropped image
  * also run optional table prediction here,
    map table label and confidence already
    (so no need to pass these arrays everywhere)
  * suppress all non-text type regions in textline mask
  * also return text+table mask
    (so no need to reconstruct it everywhere)
- apply page mask to textline mask and early layout result
  (i.e. suppress areas beyond border contour)
- `run_graphics_and_columns()`:
  * rename → `run_columns()`
  * no table prediction here
  * no page extraction here
  * no page cropping+masking here
  * no textline mask suppression here
- `run_graphics_and_columns_without_layout()`: drop
  (not needed anymore)
- `run_marginals()` vs. `get_marginals()`: extract
  `text_mask` internally from early layout
- early page cropping for col-classifier:
  also use cropped image in input binarization mode
- early page cropping for col-classifier:
  get external contours instead of indiscriminate tree
- writer: skip layout mode now also uses cropped coordinates
  (so drop kwarg for it)
This commit is contained in:
Robert Sachunsky 2026-04-29 01:52:29 +02:00
parent 7e7cc6a801
commit 33c055389d
3 changed files with 116 additions and 184 deletions

View file

@ -314,10 +314,10 @@ class Eynollah:
img = self.imread(image, binary=self.input_binary) img = self.imread(image, binary=self.input_binary)
width_early = img.shape[1] width_early = img.shape[1]
_, page_coord = self.early_page_for_num_of_column_classification(image) page_img, page_coord = self.early_page_for_num_of_column_classification(img)
if self.input_binary: if self.input_binary:
img_in = img img_in = page_img
else: else:
img_1ch = self.imread(image, grayscale=True, uint8=False) img_1ch = self.imread(image, grayscale=True, uint8=False)
img_1ch = img_1ch[page_coord[0]: page_coord[1], img_1ch = img_1ch[page_coord[0]: page_coord[1],
@ -365,7 +365,7 @@ class Eynollah:
width_early = img.shape[1] width_early = img.shape[1]
t1 = time.time() t1 = time.time()
_, page_coord = self.early_page_for_num_of_column_classification(image) page_img, page_coord = self.early_page_for_num_of_column_classification(img)
label_p_pred = np.ones(6) label_p_pred = np.ones(6)
conf_col = 1.0 conf_col = 1.0
@ -376,7 +376,7 @@ class Eynollah:
elif (not self.num_col_upper and not self.num_col_lower or elif (not self.num_col_upper and not self.num_col_lower or
self.num_col_upper != self.num_col_lower): self.num_col_upper != self.num_col_lower):
if self.input_binary: if self.input_binary:
img_in = img img_in = page_img
else: else:
img_1ch = self.imread(image, grayscale=True) img_1ch = self.imread(image, grayscale=True)
img_1ch = img_1ch[page_coord[0]: page_coord[1], img_1ch = img_1ch[page_coord[0]: page_coord[1],
@ -856,6 +856,7 @@ class Eynollah:
[[w, 0]], [[w, 0]],
[[w, h]], [[w, h]],
[[0, h]]])] [[0, h]]])]
mask_page = np.ones((h, w), dtype=np.uint8)
if not self.ignore_page_extraction: if not self.ignore_page_extraction:
self.logger.debug("enter extract_page") self.logger.debug("enter extract_page")
#cv2.GaussianBlur(img, (5, 5), 0) #cv2.GaussianBlur(img, (5, 5), 0)
@ -878,21 +879,22 @@ class Eynollah:
#h = h + (self.image.shape[0] - (y + h)) #h = h + (self.image.shape[0] - (y + h))
box = [x, y, w, h] box = [x, y, w, h]
cropped_page, page_coord = crop_image_inside_box(box, img) cropped_page, page_coord = crop_image_inside_box(box, img)
self.logger.debug("exit extract_page") mask_page = np.zeros((h, w), dtype=np.uint8)
return cropped_page, page_coord, cont_page mask_page = cv2.fillPoly(mask_page, pts=[cnt - [x, y]], color=1)
def early_page_for_num_of_column_classification(self, image): self.logger.debug("exit extract_page")
img = self.imread(image, binary=self.input_binary) return page_coord, cont_page, cropped_page, mask_page
def early_page_for_num_of_column_classification(self, img):
if not self.ignore_page_extraction: if not self.ignore_page_extraction:
self.logger.debug("enter early_page_for_num_of_column_classification") self.logger.debug("enter early_page_for_num_of_column_classification")
img = cv2.GaussianBlur(img, (5, 5), 0) img2 = cv2.GaussianBlur(img, (5, 5), 0)
prediction = self.do_prediction(False, img, self.model_zoo.get("page")) prediction = self.do_prediction(False, img2, self.model_zoo.get("page"))
prediction = cv2.dilate(prediction, KERNEL, iterations=3) prediction = cv2.dilate(prediction, KERNEL, iterations=3)
contours, _ = cv2.findContours(prediction, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) contours, _ = cv2.findContours(prediction, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if len(contours)>0: if len(contours):
cnt_size = np.array([cv2.contourArea(contours[j]) areas = np.array(list(map(cv2.contourArea, contours)))
for j in range(len(contours))]) cnt = contours[np.argmax(areas)]
cnt = contours[np.argmax(cnt_size)]
box = cv2.boundingRect(cnt) box = cv2.boundingRect(cnt)
else: else:
box = [0, 0, img.shape[1], img.shape[0]] box = [0, 0, img.shape[1], img.shape[0]]
@ -1060,21 +1062,18 @@ class Eynollah:
return result, conf_textline return result, conf_textline
def get_early_layout( def get_early_layout(
self, image, self, img,
num_col_classifier, num_col_classifier,
label_text=1, label_text=1,
label_imgs=2, label_imgs=2,
label_seps=3, label_seps=3,
label_tabs=10,
): ):
self.logger.debug("enter get_early_layout") self.logger.debug("enter get_early_layout")
t_in = time.time() t_in = time.time()
erosion_hurts = False erosion_hurts = False
img = image['img_res'] # already cropped
img_height_h = img.shape[0] img_height_h, img_width_h = img.shape[:2]
img_width_h = img.shape[1]
img_org = image['img']
img_height_org = img_org.shape[0]
img_width_org = img_org.shape[1]
if num_col_classifier == 1: if num_col_classifier == 1:
img_w_new = 1000 img_w_new = 1000
@ -1088,7 +1087,7 @@ class Eynollah:
img_w_new = 3000 img_w_new = 3000
else: else:
img_w_new = 4000 img_w_new = 4000
img_h_new = img_w_new * img.shape[0] // img.shape[1] img_h_new = img_w_new * img_height_h // img_width_h
img_resized = resize_image(img, img_h_new, img_w_new) img_resized = resize_image(img, img_h_new, img_w_new)
self.logger.debug("detecting textlines on %s with %d colors", self.logger.debug("detecting textlines on %s with %d colors",
str(img_resized.shape), len(np.unique(img_resized))) str(img_resized.shape), len(np.unique(img_resized)))
@ -1096,12 +1095,10 @@ class Eynollah:
textline_mask_tot_ea, confidence_textline = self.run_textline(img_resized) textline_mask_tot_ea, confidence_textline = self.run_textline(img_resized)
textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h) textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h)
confidence_textline = resize_image(confidence_textline, img_height_h, img_width_h) confidence_textline = resize_image(confidence_textline, img_height_h, img_width_h)
if self.plotter:
self.plotter.save_plot_of_textlines(textline_mask_tot_ea, img_resized, image['name'])
if self.skip_layout_and_reading_order: if self.skip_layout_and_reading_order:
self.logger.debug("exit get_early_layout") self.logger.debug("exit get_early_layout")
return None, erosion_hurts, None, None, textline_mask_tot_ea, None, None return erosion_hurts, None, None, None, None, textline_mask_tot_ea, None, None
#print("inside 2 ", time.time()-t_in) #print("inside 2 ", time.time()-t_in)
if num_col_classifier == 1 or num_col_classifier == 2: if num_col_classifier == 1 or num_col_classifier == 2:
@ -1110,15 +1107,13 @@ class Eynollah:
else: else:
patches = False patches = False
self.logger.debug("resized to %dx%d for %d cols", self.logger.debug("resized to %dx%d for %d cols",
img_resized.shape[1], img_resized.shape[0], img_w_new, img_h_new, num_col_classifier)
num_col_classifier)
else: else:
new_w = (900+ (num_col_classifier-3)*100) new_w = (900+ (num_col_classifier-3)*100)
new_h = new_w * img.shape[0] // img.shape[1] new_h = new_w * img_height_h // img_width_h
img_resized = resize_image(img_resized, new_h, new_w) img_resized = resize_image(img_resized, new_h, new_w)
self.logger.debug("resized to %dx%d (new_w=%d) for %d cols", self.logger.debug("resized to %dx%d for %d cols",
img_resized.shape[1], img_resized.shape[0], new_w, new_h, num_col_classifier)
new_w, num_col_classifier)
patches = True patches = True
prediction_regions, confidence_regions = \ prediction_regions, confidence_regions = \
@ -1132,9 +1127,16 @@ class Eynollah:
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
confidence_regions = resize_image(confidence_regions, img_height_h, img_width_h) confidence_regions = resize_image(confidence_regions, img_height_h, img_width_h)
if self.tables:
prediction_tables, confidence_tables = self.get_tables_from_model(img)
else:
prediction_tables = np.zeros(img.shape[:2], dtype=np.uint8)
confidence_tables = np.zeros(img.shape[:2], dtype=bool)
mask_texts_only = (prediction_regions == label_text).astype('uint8') mask_texts_only = (prediction_regions == label_text).astype('uint8')
mask_images_only = (prediction_regions == label_imgs).astype('uint8') mask_images_only = (prediction_regions == label_imgs).astype('uint8')
mask_seps_only = (prediction_regions == label_seps).astype('uint8') mask_seps_only = (prediction_regions == label_seps).astype('uint8')
mask_tabs_only = prediction_tables
##if num_col_classifier == 1 or num_col_classifier == 2: ##if num_col_classifier == 1 or num_col_classifier == 2:
###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1) ###mask_texts_only = cv2.erode(mask_texts_only, KERNEL, iterations=1)
@ -1148,21 +1150,30 @@ class Eynollah:
polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001) polygons_of_only_texts = return_contours_of_interested_region(mask_texts_only,1,0.00001)
##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts) ##polygons_of_only_texts = dilate_textregion_contours(polygons_of_only_texts)
polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001) polygons_of_only_seps = return_contours_of_interested_region(mask_seps_only,1,0.00001)
polygons_of_only_tabs = return_contours_of_interested_region(mask_tabs_only,1,0.00001)
text_regions_p = np.zeros_like(prediction_regions) text_regions_p = np.zeros_like(prediction_regions)
text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_seps, color=label_seps) text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_seps, color=label_seps)
text_regions_p[mask_images_only == 1] = label_imgs text_regions_p[mask_images_only == 1] = label_imgs
text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_texts, color=label_text) text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_texts, color=label_text)
text_regions_p = cv2.fillPoly(text_regions_p, pts=polygons_of_only_tabs, color=label_tabs)
textline_mask_tot_ea[text_regions_p == 0] = 0 textline_mask_tot_ea[text_regions_p != label_text] = 0
confidence_textline[text_regions_p != label_text] = 0
confidence_regions[text_regions_p == label_tabs] = \
confidence_tables[text_regions_p == label_tabs]
regions_without_separators = ((text_regions_p == label_text) |
(text_regions_p == label_tabs)).astype(np.uint8)
#plt.imshow(textline_mask_tot_ea) #plt.imshow(textline_mask_tot_ea)
#plt.show() #plt.show()
#print("inside 4 ", time.time()-t_in) #print("inside 4 ", time.time()-t_in)
self.logger.debug("exit get_early_layout") self.logger.debug("exit get_early_layout")
return (text_regions_p, return (erosion_hurts,
erosion_hurts,
polygons_seplines, polygons_seplines,
polygons_of_only_texts, polygons_of_only_texts,
regions_without_separators,
text_regions_p,
textline_mask_tot_ea, textline_mask_tot_ea,
confidence_regions, confidence_regions,
confidence_textline) confidence_textline)
@ -1494,65 +1505,22 @@ class Eynollah:
table_prediction = table_prediction.astype(np.uint8) table_prediction = table_prediction.astype(np.uint8)
return table_prediction, table_confidence return table_prediction, table_confidence
def run_graphics_and_columns( def run_columns(
self, text_regions_p_1, textline_mask_tot_ea, self, text_regions_p_1,
regions_confidence, textline_confidence,
num_col_classifier, num_column_is_classified, num_col_classifier, num_column_is_classified,
erosion_hurts, image, erosion_hurts,
label_imgs=2, label_imgs=2,
label_seps=3, label_seps=3,
): ):
"""detect page boundary and apply its mask/bbox, post-process column classifier result, optionally detect tables""" """post-process column classifier result"""
t_in_gr = time.time() t_in_gr = time.time()
regions_without_separators = ((text_regions_p_1 != label_seps) &
(text_regions_p_1 != 0)).astype(np.uint8)
if not erosion_hurts:
regions_without_separators = cv2.erode(regions_without_separators, KERNEL, iterations=6)
image_page, page_coord, cont_page = self.extract_page(image)
if self.tables:
table_prediction, table_confidence = self.get_tables_from_model(image_page)
else:
table_prediction = np.zeros(image_page.shape[:2], dtype=np.uint8)
table_confidence = np.zeros(image_page.shape[:2], dtype=bool)
if self.plotter:
self.plotter.save_page_image(image_page, image['name'])
if not self.ignore_page_extraction:
mask_page = np.zeros_like(text_regions_p_1)
mask_page = cv2.fillPoly(mask_page, pts=[cont_page[0]], color=1)
mask_page = mask_page == 0
text_regions_p_1[mask_page] = 0
textline_mask_tot_ea[mask_page] = 0
regions_confidence[mask_page] = 0
textline_confidence[mask_page] = 0
box = slice(*page_coord[0:2]), slice(*page_coord[2:4])
text_regions_p_1 = text_regions_p_1[box]
textline_mask_tot_ea = textline_mask_tot_ea[box]
regions_confidence = regions_confidence[box]
textline_confidence = textline_confidence[box]
self.logger.debug("Cropped page is %dx%d", *text_regions_p_1.shape)
mask_images = (text_regions_p_1 == label_imgs).astype(np.uint8)
mask_images = cv2.erode(mask_images, KERNEL, iterations=10)
textline_mask_tot_ea[mask_images == 1] = 0
textline_confidence[mask_images == 1] = 0
img_only_regions_with_sep = ((text_regions_p_1 != label_seps) &
(text_regions_p_1 != 0)).astype(np.uint8)
#print("inside graphics 2 ", time.time() - t_in_gr)
if erosion_hurts:
img_only_regions = img_only_regions_with_sep
else:
img_only_regions = cv2.erode(img_only_regions_with_sep, KERNEL, iterations=6)
##print(img_only_regions.shape,'img_only_regions')
##plt.imshow(img_only_regions[:,:])
##plt.show()
##num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0)
try: try:
num_col, _ = find_num_col(img_only_regions, num_col_classifier, self.tables, multiplier=6.0) num_col, _ = find_num_col(regions_without_separators, num_col_classifier, self.tables, multiplier=6.0)
num_col = num_col + 1 num_col = num_col + 1
if not num_column_is_classified: if not num_column_is_classified:
num_col_classifier = num_col num_col_classifier = num_col
@ -1562,29 +1530,7 @@ class Eynollah:
except Exception as why: except Exception as why:
self.logger.exception(why) self.logger.exception(why)
num_col = None num_col = None
return (num_col, num_col_classifier, return num_col, num_col_classifier
page_coord, image_page, cont_page,
text_regions_p_1,
table_prediction,
textline_mask_tot_ea,
regions_confidence,
table_confidence,
textline_confidence,
)
def run_graphics_and_columns_without_layout(self, textline_mask_tot_ea, image):
image_page, page_coord, cont_page = self.extract_page(image)
mask_page = np.zeros_like(textline_mask_tot_ea)
mask_page = cv2.fillPoly(mask_page, pts=[cont_page[0]], color=1)
mask_page = mask_page == 0
textline_mask_tot_ea[mask_page] = 0
box = slice(*page_coord[0:2]), slice(*page_coord[2:4])
textline_mask_tot_ea = textline_mask_tot_ea[box]
return page_coord, image_page, textline_mask_tot_ea, cont_page
def run_enhancement(self, image): def run_enhancement(self, image):
t_in = time.time() t_in = time.time()
@ -1624,21 +1570,14 @@ class Eynollah:
self.logger.info("slope_deskew: %.2f°", slope_deskew) self.logger.info("slope_deskew: %.2f°", slope_deskew)
return slope_deskew return slope_deskew
def run_marginals( def run_marginals(self, num_col_classifier, slope_deskew, text_regions_p):
self, num_col_classifier, slope_deskew, text_regions_p, table_prediction): get_marginals(num_col_classifier, slope_deskew, text_regions_p,
kernel=KERNEL)
regions_without_separators = (text_regions_p == 1).astype(np.uint8)
if self.tables:
regions_without_separators[table_prediction == 1] = 1
get_marginals(regions_without_separators, text_regions_p,
num_col_classifier, slope_deskew, kernel=KERNEL)
def get_full_layout( def get_full_layout(
self, image_page, self, image_page,
text_regions_p, text_regions_p,
num_col_classifier, num_col_classifier,
table_prediction,
label_text=1, label_text=1,
label_imgs=2, label_imgs=2,
label_imgs_fl=5, label_imgs_fl=5,
@ -1669,10 +1608,6 @@ class Eynollah:
text_regions_p[text_regions_p == label_seps] = label_seps_fl text_regions_p[text_regions_p == label_seps] = label_seps_fl
text_regions_p[text_regions_p == label_marg] = label_marg_fl text_regions_p[text_regions_p == label_marg] = label_marg_fl
regions_without_separators = (text_regions_p == label_text).astype(np.uint8)
# regions_without_separators = ( text_regions_p == 1 | text_regions_p == 2 ) * 1
image_page = image_page.astype(np.uint8)
if self.full_layout: if self.full_layout:
regions_fully, regionsfl_confidence = self.extract_text_regions_new( regions_fully, regionsfl_confidence = self.extract_text_regions_new(
image_page, image_page,
@ -1689,20 +1624,13 @@ class Eynollah:
regions_fully[drops] = label_drop_fl_model regions_fully[drops] = label_drop_fl_model
drops = fill_bb_of_drop_capitals(regions_fully, text_regions_p) drops = fill_bb_of_drop_capitals(regions_fully, text_regions_p)
text_regions_p[drops] = label_drop_fl text_regions_p[drops] = label_drop_fl
regions_without_separators[drops] = 1 # also cover in reading-order
else: else:
regions_fully = None, regions_fully = None,
regionsfl_confidence = None regionsfl_confidence = None
if self.tables:
text_regions_p[table_prediction == 1] = label_tabs
regions_without_separators[table_prediction == 1] = 1
# no need to return text_regions_p (inplace editing) # no need to return text_regions_p (inplace editing)
self.logger.debug('exit get_full_layout') self.logger.debug('exit get_full_layout')
return (regions_fully, regionsfl_confidence, return regions_fully, regionsfl_confidence
regions_without_separators)
def get_deskewed_masks( def get_deskewed_masks(
self, self,
@ -2200,22 +2128,26 @@ class Eynollah:
f"{image['dpi']} DPI, {num_col_classifier} columns") f"{image['dpi']} DPI, {num_col_classifier} columns")
self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)") self.logger.info(f"Enhancement complete ({time.time() - t0:.1f}s)")
t1 = time.time()
page_coord, cont_page, image_page, mask_page = self.extract_page(image)
if not self.ignore_page_extraction:
self.logger.debug("Cropped page is %dx%d", image_page.shape[1], image_page.shape[0])
self.logger.info("Cropping took %.1fs", time.time() - t1)
if self.plotter:
self.plotter.save_page_image(image_page, image['name'])
# Basic Processing Mode # Basic Processing Mode
if self.skip_layout_and_reading_order: if self.skip_layout_and_reading_order:
self.logger.info("Step 2/5: Basic Processing Mode") self.logger.info("Step 2/5: Basic Processing Mode")
self.logger.info("Skipping layout analysis and reading order detection") self.logger.info("Skipping layout analysis and reading order detection")
_ ,_, _, _, textline_mask_tot_ea, _, _ = \ _, _, _, _, _, textline_mask_tot_ea, _, _ = \
self.get_early_layout(image, num_col_classifier) self.get_early_layout(image_page, num_col_classifier)
page_coord, image_page, textline_mask_tot_ea, cont_page = \ textline_mask_tot_ea *= mask_page
self.run_graphics_and_columns_without_layout(textline_mask_tot_ea, image) textline_cnt, textline_hir = return_contours_of_image(textline_mask_tot_ea)
##all_found_textline_polygons =self.scale_contours_new(textline_mask_tot_ea)
cnt_clean_rot_raw, hir_on_cnt_clean_rot = return_contours_of_image(textline_mask_tot_ea)
all_found_textline_polygons = filter_contours_area_of_image( all_found_textline_polygons = filter_contours_area_of_image(
textline_mask_tot_ea, cnt_clean_rot_raw, hir_on_cnt_clean_rot, max_area=1, min_area=0.00001) textline_mask_tot_ea, textline_cnt, textline_hir, max_area=1, min_area=0.00001)
cx_textlines, cy_textlines = find_center_of_contours(all_found_textline_polygons) cx_textlines, cy_textlines = find_center_of_contours(all_found_textline_polygons)
w_h_textlines = [cv2.boundingRect(polygon)[2:] w_h_textlines = [cv2.boundingRect(polygon)[2:]
@ -2247,7 +2179,6 @@ class Eynollah:
cont_page=cont_page, cont_page=cont_page,
polygons_seplines=[], polygons_seplines=[],
conf_textregions=[0], conf_textregions=[0],
skip_layout_reading_order=True
) )
self.logger.info("Basic processing complete") self.logger.info("Basic processing complete")
writer.write_pagexml(pcgts) writer.write_pagexml(pcgts)
@ -2257,15 +2188,21 @@ class Eynollah:
t1 = time.time() t1 = time.time()
self.logger.info("Step 2/5: Layout Analysis") self.logger.info("Step 2/5: Layout Analysis")
(text_regions_p, (erosion_hurts,
erosion_hurts,
polygons_seplines, polygons_seplines,
polygons_text_early, polygons_text_early,
regions_without_separators,
text_regions_p,
textline_mask_tot_ea, textline_mask_tot_ea,
regions_confidence, regions_confidence,
textline_confidence) = self.get_early_layout(image, num_col_classifier) textline_confidence) = self.get_early_layout(image_page, num_col_classifier)
regions_without_separators *= mask_page
text_regions_p *= mask_page
textline_mask_tot_ea *= mask_page
t2 = time.time() t2 = time.time()
self.logger.info("Eearly layout took %.1fs", t2 - t1) self.logger.info("Early layout took %.1fs", t2 - t1)
if self.plotter:
self.plotter.save_plot_of_textlines(textline_mask_tot_ea, image_page, image['name'])
if num_col_classifier == 1 or num_col_classifier ==2: if num_col_classifier == 1 or num_col_classifier ==2:
if num_col_classifier == 1: if num_col_classifier == 1:
@ -2278,28 +2215,22 @@ class Eynollah:
slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew) slope_deskew = self.run_deskew(textline_mask_tot_ea_deskew)
else: else:
slope_deskew = self.run_deskew(textline_mask_tot_ea) slope_deskew = self.run_deskew(textline_mask_tot_ea)
if self.plotter:
self.plotter.save_deskewed_image(slope_deskew, image['img'], image['name'])
t3 = time.time()
self.logger.info("Deskewing took %.1fs", t3 - t2)
(num_col, num_col_classifier,
page_coord, image_page, cont_page,
text_regions_p, table_prediction, textline_mask_tot_ea,
regions_confidence, table_confidence, textline_confidence) = \
self.run_graphics_and_columns(text_regions_p, textline_mask_tot_ea,
regions_confidence, textline_confidence,
num_col_classifier, num_column_is_classified,
erosion_hurts, image)
t4 = time.time()
self.logger.info("Cropping took %.1fs", t4 - t3)
textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
# if ratio of text regions to page area is smaller that 30%, # if ratio of text regions to page area is smaller that 30%,
# then ignore skew angle above 45° # then ignore skew angle above 45°
if (abs(slope_deskew) > 45 and if (abs(slope_deskew) > 45 and
((text_regions_p == label_text).sum()) <= 0.3 * image_page.size): ((text_regions_p == label_text).sum()) <= 0.3 * image_page.size):
slope_deskew = 0 slope_deskew = 0
if self.plotter:
self.plotter.save_deskewed_image(slope_deskew, image['img'], image['name'])
t3 = time.time()
self.logger.info("Deskewing took %.1fs", t3 - t2)
num_col, num_col_classifier = \
self.run_columns(text_regions_p,
num_col_classifier, num_column_is_classified,
erosion_hurts)
t4 = time.time()
textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
if not num_col and len(polygons_text_early) == 0 or not image_page.size: if not num_col and len(polygons_text_early) == 0 or not image_page.size:
self.logger.info("No columns detected - generating empty PAGE-XML") self.logger.info("No columns detected - generating empty PAGE-XML")
@ -2336,32 +2267,34 @@ class Eynollah:
img_h_new = img_w_new * img_h_org // img_w_org img_h_new = img_w_new * img_h_org // img_w_org
text_regions_p_new = resize_image(text_regions_p, img_h_new, img_w_new) text_regions_p_new = resize_image(text_regions_p, img_h_new, img_w_new)
table_prediction_new = resize_image(table_prediction, img_h_new, img_w_new) self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_new)
self.run_marginals(num_col_classifier, slope_deskew, text_regions_p_new, table_prediction_new)
text_regions_p = resize_image(text_regions_p_new, img_h_org, img_w_org) text_regions_p = resize_image(text_regions_p_new, img_h_org, img_w_org)
t5 = time.time()
self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
else:
t5 = time.time()
if self.plotter: if self.plotter:
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name']) self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name']) self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name'])
t5 = time.time()
self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
self.logger.info("Step 3/5: Text Line Detection")
regions_fully, regionsfl_confidence, regions_without_separators = \ regions_fully, regionsfl_confidence = \
self.get_full_layout(image_page, self.get_full_layout(image_page, text_regions_p, num_col_classifier)
text_regions_p,
num_col_classifier,
table_prediction)
if self.full_layout: if self.full_layout:
regions_without_separators[text_regions_p == label_drop_fl] = 1 # also cover in reading-order
textline_mask_tot_ea_org[text_regions_p == label_drop_fl] = 0 # skip for textlines textline_mask_tot_ea_org[text_regions_p == label_drop_fl] = 0 # skip for textlines
textline_mask_tot_ea[text_regions_p == label_drop_fl] = 1 # needed for reading order textline_mask_tot_ea[text_regions_p == label_drop_fl] = 1 # needed for reading order
polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p, polygons_of_drop_capitals = return_contours_of_interested_region(text_regions_p,
label_drop_fl, label_drop_fl,
min_area=0.00003) min_area=0.00003)
conf_drops = get_region_confidences(polygons_of_drop_capitals, regionsfl_confidence) conf_drops = get_region_confidences(polygons_of_drop_capitals, regionsfl_confidence)
t6 = time.time() t6 = time.time()
self.logger.info("Full layout took %.1fs", t6 - t5) self.logger.info("Full layout took %.1fs", t6 - t5)
else:
t6 = time.time()
self.logger.info("Step 3/5: Contour extraction")
min_area_mar = 0.00001 min_area_mar = 0.00001
marginal_mask = (text_regions_p == label_marg_fl).astype(np.uint8) marginal_mask = (text_regions_p == label_marg_fl).astype(np.uint8)
@ -2373,7 +2306,7 @@ class Eynollah:
polygons_of_images = return_contours_of_interested_region(text_regions_p, label_imgs_fl) polygons_of_images = return_contours_of_interested_region(text_regions_p, label_imgs_fl)
conf_marginals = get_region_confidences(polygons_of_marginals, regions_confidence) conf_marginals = get_region_confidences(polygons_of_marginals, regions_confidence)
conf_images = get_region_confidences(polygons_of_images, regions_confidence) conf_images = get_region_confidences(polygons_of_images, regions_confidence)
conf_tables = get_region_confidences(polygons_of_tables, table_confidence) conf_tables = get_region_confidences(polygons_of_tables, regions_confidence)
polygons_of_textregions = return_contours_of_interested_region(text_regions_p, label_text, polygons_of_textregions = return_contours_of_interested_region(text_regions_p, label_text,
min_area=MIN_AREA_REGION) min_area=MIN_AREA_REGION)
@ -2513,7 +2446,7 @@ class Eynollah:
image['scale_x'], image['scale_y'], image['name']) image['scale_x'], image['scale_y'], image['name'])
t_order = time.time() t_order = time.time()
self.logger.info("Step 4/5: Reading Order Detection") self.logger.info("Step 4/5: Reading Order")
if self.right2left: if self.right2left:
self.logger.info("Right-to-left mode enabled") self.logger.info("Right-to-left mode enabled")
if self.headers_off: if self.headers_off:

View file

@ -6,15 +6,18 @@ from .contour import find_center_of_contours, return_contours_of_interested_regi
from .resize import resize_image from .resize import resize_image
from .rotate import rotate_image from .rotate import rotate_image
def get_marginals(text_mask, early_layout, num_col, slope_deskew, def get_marginals(num_col, slope_deskew, early_layout,
kernel=None, kernel=None,
label_text=1, label_text=1,
label_marg=4, label_marg=4,
label_tabs=10,
): ):
if kernel is None: if kernel is None:
kernel = np.ones((5, 5), dtype=np.uint8) kernel = np.ones((5, 5), dtype=np.uint8)
kernel_hor = np.ones((1, 5), dtype=np.uint8) kernel_hor = np.ones((1, 5), dtype=np.uint8)
text_mask = ((early_layout == label_text) |
(early_layout == label_tabs)).astype(np.uint8)
text_mask_d = rotate_image(text_mask, slope_deskew) text_mask_d = rotate_image(text_mask, slope_deskew)
main_mask_d = np.zeros_like(text_mask_d) main_mask_d = np.zeros_like(text_mask_d)
height, width = main_mask_d.shape height, width = main_mask_d.shape

View file

@ -166,7 +166,6 @@ class EynollahXmlWriter:
conf_images=None, conf_images=None,
conf_tables=None, conf_tables=None,
conf_drops=None, conf_drops=None,
skip_layout_reading_order=False,
): ):
self.logger.debug('enter build_pagexml') self.logger.debug('enter build_pagexml')
@ -180,10 +179,7 @@ class EynollahXmlWriter:
if len(cont_page): if len(cont_page):
page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_points(cont_page[0])))) page.set_Border(BorderType(Coords=CoordsType(points=self.calculate_points(cont_page[0]))))
if skip_layout_reading_order: offset = [page_coord[2], page_coord[0]]
offset = None
else:
offset = [page_coord[2], page_coord[0]]
counter = EynollahIdCounter() counter = EynollahIdCounter()
if len(order_of_texts): if len(order_of_texts):
_counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts)) _counter_marginals = EynollahIdCounter(region_idx=len(order_of_texts))
@ -283,7 +279,7 @@ class EynollahXmlWriter:
for region_contour in polygons_seplines: for region_contour in polygons_seplines:
page.add_SeparatorRegion( page.add_SeparatorRegion(
SeparatorRegionType(id=counter.next_region_id, SeparatorRegionType(id=counter.next_region_id,
Coords=CoordsType(points=self.calculate_points(region_contour, None)))) Coords=CoordsType(points=self.calculate_points(region_contour, offset))))
for mm, region_contour in enumerate(found_polygons_tables): for mm, region_contour in enumerate(found_polygons_tables):
table = TableRegionType( table = TableRegionType(