mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-06-07 19:35:01 +02:00
Merge remote-tracking branch 'origin/main' into xml-rfct
This commit is contained in:
commit
02aa31cc66
3 changed files with 20 additions and 10 deletions
|
@ -1666,10 +1666,17 @@ class Eynollah:
|
|||
cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d])
|
||||
cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d)
|
||||
try:
|
||||
cx_bigest_d_last5 = cx_bigest_d[-5:]
|
||||
cy_biggest_d_last5 = cy_biggest_d[-5:]
|
||||
dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))]
|
||||
ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d)
|
||||
if len(cx_bigest_d) >= 5:
|
||||
cx_bigest_d_last5 = cx_bigest_d[-5:]
|
||||
cy_biggest_d_last5 = cy_biggest_d[-5:]
|
||||
dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))]
|
||||
ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d)
|
||||
else:
|
||||
cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):]
|
||||
cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):]
|
||||
dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))]
|
||||
ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d)
|
||||
|
||||
cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
|
||||
cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
|
||||
except Exception as why:
|
||||
|
|
|
@ -2116,13 +2116,17 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
|
|||
x_start_by_order=[]
|
||||
x_end_by_order=[]
|
||||
if len(x_starting)>0:
|
||||
all_columns = np.array(range(len(peaks_neg_tot)-1))
|
||||
columns_covered_by_lines_covered_more_than_2col=[]
|
||||
|
||||
for dj in range(len(x_starting)):
|
||||
columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) )
|
||||
if set( list(np.array(range(x_starting[dj],x_ending[dj])) ) ) == set(all_columns):
|
||||
pass
|
||||
else:
|
||||
columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) )
|
||||
columns_covered_by_lines_covered_more_than_2col=list(set(columns_covered_by_lines_covered_more_than_2col))
|
||||
|
||||
all_columns=np.array(range(len(peaks_neg_tot)-1))
|
||||
|
||||
|
||||
columns_not_covered=list( set(all_columns)-set(columns_covered_by_lines_covered_more_than_2col) )
|
||||
|
||||
|
|
|
@ -46,16 +46,15 @@ def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia):
|
|||
region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
|
||||
region_order_sub.set('id', "ro357564684568544579089")
|
||||
region_counter = EynollahIdCounter()
|
||||
indexer_region = 0
|
||||
for idx_textregion, _ in enumerate(order_of_texts):
|
||||
name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
|
||||
name.set('index', str(indexer_region))
|
||||
name.set('index', str(region_counter.get('region')))
|
||||
name.set('regionRef', region_counter.region_id(order_of_texts[idx_textregion]))
|
||||
indexer_region += 1
|
||||
for id_marginal in id_of_marginalia:
|
||||
name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
|
||||
name.set('index', str(indexer_region))
|
||||
name.set('index', str(region_counter.get('region')))
|
||||
name.set('regionRef', id_marginal)
|
||||
region_counter.inc('region')
|
||||
|
||||
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
|
||||
indexes_sorted = np.array(indexes_sorted)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue