mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-05-01 03:32:00 +02:00
move label definition and deskewing cancellation up
This commit is contained in:
parent
fa882e1dbe
commit
52eb4c9a0a
1 changed files with 19 additions and 26 deletions
|
|
@ -1660,7 +1660,7 @@ class Eynollah:
|
||||||
# class | early | old full (and decoded here) | new full (just predicted) | comment
|
# class | early | old full (and decoded here) | new full (just predicted) | comment
|
||||||
# ---
|
# ---
|
||||||
# para | 1 | 1 | 1 |
|
# para | 1 | 1 | 1 |
|
||||||
# head | - | 2 | 2 | used in split_textregion_main_vs_head()
|
# head | - | 2 | 2 | used in split_textregion_main_vs_head() afterwards
|
||||||
# drop | - | 4 | 3 | assigned from full model below
|
# drop | - | 4 | 3 | assigned from full model below
|
||||||
# img | 2 | 5 | 4 | mapped below
|
# img | 2 | 5 | 4 | mapped below
|
||||||
# sep | 3 | 6 | 5 | mapped + assigned from full model below
|
# sep | 3 | 6 | 5 | mapped + assigned from full model below
|
||||||
|
|
@ -2167,6 +2167,16 @@ class Eynollah:
|
||||||
img_pil=None,
|
img_pil=None,
|
||||||
pcgts=None,
|
pcgts=None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
label_text = 1
|
||||||
|
label_imgs = 2
|
||||||
|
label_imgs_fl = 5
|
||||||
|
label_seps = 3
|
||||||
|
label_seps_fl = 6
|
||||||
|
label_marg = 4
|
||||||
|
label_marg_fl = 8
|
||||||
|
label_drop_fl = 4
|
||||||
|
label_tabs = 10
|
||||||
|
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
self.logger.info(img_filename)
|
self.logger.info(img_filename)
|
||||||
|
|
||||||
|
|
@ -2289,15 +2299,17 @@ class Eynollah:
|
||||||
regions_confidence, textline_confidence,
|
regions_confidence, textline_confidence,
|
||||||
num_col_classifier, num_column_is_classified,
|
num_col_classifier, num_column_is_classified,
|
||||||
erosion_hurts, image)
|
erosion_hurts, image)
|
||||||
#self.logger.info("run graphics %.1fs ", time.time() - t1t)
|
t4 = time.time()
|
||||||
#print("text region early -3 in %.1fs", time.time() - t0)
|
self.logger.info("Cropping took %.1fs", t4 - t3)
|
||||||
textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
|
textline_mask_tot_ea_org = np.copy(textline_mask_tot_ea)
|
||||||
|
|
||||||
#plt.imshow(table_prediction)
|
# if ratio of text regions to page area is smaller that 30%,
|
||||||
#plt.show()
|
# then ignore skew angle above 45°
|
||||||
self.logger.info(f"Layout analysis complete ({time.time() - t1:.1f}s)")
|
if (abs(slope_deskew) > 45 and
|
||||||
|
((text_regions_p == label_text).sum()) <= 0.3 * image_page.size):
|
||||||
|
slope_deskew = 0
|
||||||
|
|
||||||
if not num_col and len(polygons_text_early) == 0:
|
if not num_col and len(polygons_text_early) == 0 or not image_page.size:
|
||||||
self.logger.info("No columns detected - generating empty PAGE-XML")
|
self.logger.info("No columns detected - generating empty PAGE-XML")
|
||||||
|
|
||||||
pcgts = writer.build_pagexml_no_full_layout(
|
pcgts = writer.build_pagexml_no_full_layout(
|
||||||
|
|
@ -2342,25 +2354,6 @@ class Eynollah:
|
||||||
if self.plotter:
|
if self.plotter:
|
||||||
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
|
self.plotter.save_plot_of_layout_main_all(text_regions_p, image_page, image['name'])
|
||||||
self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name'])
|
self.plotter.save_plot_of_layout_main(text_regions_p, image_page, image['name'])
|
||||||
|
|
||||||
label_text = 1
|
|
||||||
label_imgs = 2
|
|
||||||
label_imgs_fl = 5
|
|
||||||
label_seps = 3
|
|
||||||
label_seps_fl = 6
|
|
||||||
label_marg = 4
|
|
||||||
label_marg_fl = 8
|
|
||||||
label_drop_fl = 4
|
|
||||||
label_tabs = 10
|
|
||||||
if image_page.size:
|
|
||||||
# if ratio of text regions to page area is smaller that 30%,
|
|
||||||
# then deskew angle will not be allowed to exceed 45
|
|
||||||
if (abs(slope_deskew) > 45 and
|
|
||||||
((text_regions_p == label_text).sum() +
|
|
||||||
(text_regions_p == label_marg).sum()) <=
|
|
||||||
0.3 * image_page.size):
|
|
||||||
slope_deskew = 0
|
|
||||||
|
|
||||||
t5 = time.time()
|
t5 = time.time()
|
||||||
self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
|
self.logger.info("Marginalia extraction took %.1fs", t5 - t4)
|
||||||
self.logger.info("Step 3/5: Text Line Detection")
|
self.logger.info("Step 3/5: Text Line Detection")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue