mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-09 12:19:54 +02:00
move order_and_id_of_texts to utils.xml
This commit is contained in:
parent
8d4ce75656
commit
62fa710f27
3 changed files with 34 additions and 41 deletions
|
@ -63,11 +63,11 @@ from .utils import (
|
||||||
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
putt_bb_of_drop_capitals_of_model_in_patches_in_layout,
|
||||||
check_any_text_region_in_model_one_is_main_or_header,
|
check_any_text_region_in_model_one_is_main_or_header,
|
||||||
small_textlines_to_parent_adherence2,
|
small_textlines_to_parent_adherence2,
|
||||||
order_and_id_of_texts,
|
|
||||||
order_of_regions,
|
order_of_regions,
|
||||||
find_number_of_columns_in_document,
|
find_number_of_columns_in_document,
|
||||||
return_boxes_of_images_by_order_of_reading_new)
|
return_boxes_of_images_by_order_of_reading_new)
|
||||||
from .utils.pil_cv2 import check_dpi
|
from .utils.pil_cv2 import check_dpi
|
||||||
|
from .utils.xml import order_and_id_of_texts
|
||||||
from .plot import EynollahPlotter
|
from .plot import EynollahPlotter
|
||||||
from .writer import EynollahXmlWriter
|
from .writer import EynollahXmlWriter
|
||||||
|
|
||||||
|
@ -1308,7 +1308,7 @@ class Eynollah:
|
||||||
tartib = np.where(indexes_sorted == arg_order_v)[0][0]
|
tartib = np.where(indexes_sorted == arg_order_v)[0][0]
|
||||||
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point
|
order_by_con_main[args_contours_box[indexes_by_type_main[zahler]]] = tartib + ref_point
|
||||||
|
|
||||||
for jji in range(len(id_of_texts)):
|
for jji, _ in range(len(id_of_texts)):
|
||||||
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
|
order_of_texts_tot.append(order_of_texts[jji] + ref_point)
|
||||||
id_of_texts_tot.append(id_of_texts[jji])
|
id_of_texts_tot.append(id_of_texts[jji])
|
||||||
ref_point = ref_point + len(id_of_texts)
|
ref_point = ref_point + len(id_of_texts)
|
||||||
|
|
|
@ -977,45 +977,6 @@ def small_textlines_to_parent_adherence2(textlines_con, textline_iamge, num_col)
|
||||||
textlines_con_changed.append(textlines_big_org_form)
|
textlines_con_changed.append(textlines_big_org_form)
|
||||||
return textlines_con_changed
|
return textlines_con_changed
|
||||||
|
|
||||||
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
|
||||||
indexes_sorted = np.array(indexes_sorted)
|
|
||||||
index_of_types = np.array(index_of_types)
|
|
||||||
kind_of_texts = np.array(kind_of_texts)
|
|
||||||
|
|
||||||
id_of_texts = []
|
|
||||||
order_of_texts = []
|
|
||||||
|
|
||||||
index_of_types_1 = index_of_types[kind_of_texts == 1]
|
|
||||||
indexes_sorted_1 = indexes_sorted[kind_of_texts == 1]
|
|
||||||
|
|
||||||
index_of_types_2 = index_of_types[kind_of_texts == 2]
|
|
||||||
indexes_sorted_2 = indexes_sorted[kind_of_texts == 2]
|
|
||||||
|
|
||||||
##print(index_of_types,'index_of_types')
|
|
||||||
##print(kind_of_texts,'kind_of_texts')
|
|
||||||
##print(len(found_polygons_text_region),'found_polygons_text_region')
|
|
||||||
##print(index_of_types_1,'index_of_types_1')
|
|
||||||
##print(indexes_sorted_1,'indexes_sorted_1')
|
|
||||||
index_b = 0 + ref_point
|
|
||||||
for mm in range(len(found_polygons_text_region)):
|
|
||||||
|
|
||||||
id_of_texts.append("r" + str(index_b))
|
|
||||||
interest = indexes_sorted_1[indexes_sorted_1 == index_of_types_1[mm]]
|
|
||||||
|
|
||||||
if len(interest) > 0:
|
|
||||||
order_of_texts.append(interest[0])
|
|
||||||
index_b += 1
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
for mm in range(len(found_polygons_text_region_h)):
|
|
||||||
id_of_texts.append("r" + str(index_b))
|
|
||||||
interest = indexes_sorted_2[index_of_types_2[mm]]
|
|
||||||
order_of_texts.append(interest)
|
|
||||||
index_b += 1
|
|
||||||
|
|
||||||
return order_of_texts, id_of_texts
|
|
||||||
|
|
||||||
def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
def order_of_regions(textline_mask, contours_main, contours_header, y_ref):
|
||||||
|
|
||||||
##plt.imshow(textline_mask)
|
##plt.imshow(textline_mask)
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
# pylint: disable=too-many-locals,wrong-import-position,too-many-lines,too-many-statements,chained-comparison,fixme,broad-except,c-extension-no-member
|
||||||
from lxml import etree as ET
|
from lxml import etree as ET
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
NAMESPACES = {}
|
NAMESPACES = {}
|
||||||
NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
|
NAMESPACES['page'] = "http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"
|
||||||
|
@ -60,3 +62,33 @@ def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia, found
|
||||||
indexer_region += 1
|
indexer_region += 1
|
||||||
return id_of_marginalia
|
return id_of_marginalia
|
||||||
|
|
||||||
|
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
||||||
|
indexes_sorted = np.array(indexes_sorted)
|
||||||
|
index_of_types = np.array(index_of_types)
|
||||||
|
kind_of_texts = np.array(kind_of_texts)
|
||||||
|
|
||||||
|
id_of_texts = []
|
||||||
|
order_of_texts = []
|
||||||
|
|
||||||
|
index_of_types_1 = index_of_types[kind_of_texts == 1]
|
||||||
|
indexes_sorted_1 = indexes_sorted[kind_of_texts == 1]
|
||||||
|
|
||||||
|
index_of_types_2 = index_of_types[kind_of_texts == 2]
|
||||||
|
indexes_sorted_2 = indexes_sorted[kind_of_texts == 2]
|
||||||
|
|
||||||
|
index_b = 0 + ref_point
|
||||||
|
for mm, _ in enumerate(found_polygons_text_region):
|
||||||
|
id_of_texts.append("r" + str(index_b))
|
||||||
|
interest = indexes_sorted_1[indexes_sorted_1 == index_of_types_1[mm]]
|
||||||
|
if len(interest) > 0:
|
||||||
|
order_of_texts.append(interest[0])
|
||||||
|
index_b += 1
|
||||||
|
|
||||||
|
for mm, _ in enumerate(found_polygons_text_region_h):
|
||||||
|
id_of_texts.append("r" + str(index_b))
|
||||||
|
interest = indexes_sorted_2[index_of_types_2[mm]]
|
||||||
|
order_of_texts.append(interest)
|
||||||
|
index_b += 1
|
||||||
|
|
||||||
|
return order_of_texts, id_of_texts
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue