Merge remote-tracking branch 'origin/main' into xml-rfct

pull/28/head
Konstantin Baierer 4 years ago
commit 02aa31cc66

@ -1666,10 +1666,17 @@ class Eynollah:
cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d]) cx_bigest_d_big, cy_biggest_d_big, _, _, _, _, _ = find_new_features_of_contours([contours_biggest_d])
cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d) cx_bigest_d, cy_biggest_d, _, _, _, _, _ = find_new_features_of_contours(contours_only_text_parent_d)
try: try:
if len(cx_bigest_d) >= 5:
cx_bigest_d_last5 = cx_bigest_d[-5:] cx_bigest_d_last5 = cx_bigest_d[-5:]
cy_biggest_d_last5 = cy_biggest_d[-5:] cy_biggest_d_last5 = cy_biggest_d[-5:]
dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))] dists_d = [math.sqrt((cx_bigest_big[0] - cx_bigest_d_last5[j]) ** 2 + (cy_biggest_big[0] - cy_biggest_d_last5[j]) ** 2) for j in range(len(cy_biggest_d_last5))]
ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d) ind_largest = len(cx_bigest_d) -5 + np.argmin(dists_d)
else:
cx_bigest_d_last5 = cx_bigest_d[-len(cx_bigest_d):]
cy_biggest_d_last5 = cy_biggest_d[-len(cx_bigest_d):]
dists_d = [math.sqrt((cx_bigest_big[0]-cx_bigest_d_last5[j])**2 + (cy_biggest_big[0]-cy_biggest_d_last5[j])**2) for j in range(len(cy_biggest_d_last5))]
ind_largest = len(cx_bigest_d) - len(cx_bigest_d) + np.argmin(dists_d)
cx_bigest_d_big[0] = cx_bigest_d[ind_largest] cx_bigest_d_big[0] = cx_bigest_d[ind_largest]
cy_biggest_d_big[0] = cy_biggest_d[ind_largest] cy_biggest_d_big[0] = cy_biggest_d[ind_largest]
except Exception as why: except Exception as why:

@ -2116,13 +2116,17 @@ def return_boxes_of_images_by_order_of_reading_new(splitter_y_new, regions_witho
x_start_by_order=[] x_start_by_order=[]
x_end_by_order=[] x_end_by_order=[]
if len(x_starting)>0: if len(x_starting)>0:
all_columns = np.array(range(len(peaks_neg_tot)-1))
columns_covered_by_lines_covered_more_than_2col=[] columns_covered_by_lines_covered_more_than_2col=[]
for dj in range(len(x_starting)): for dj in range(len(x_starting)):
if set( list(np.array(range(x_starting[dj],x_ending[dj])) ) ) == set(all_columns):
pass
else:
columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) ) columns_covered_by_lines_covered_more_than_2col=columns_covered_by_lines_covered_more_than_2col+list(np.array(range(x_starting[dj],x_ending[dj])) )
columns_covered_by_lines_covered_more_than_2col=list(set(columns_covered_by_lines_covered_more_than_2col)) columns_covered_by_lines_covered_more_than_2col=list(set(columns_covered_by_lines_covered_more_than_2col))
all_columns=np.array(range(len(peaks_neg_tot)-1))
columns_not_covered=list( set(all_columns)-set(columns_covered_by_lines_covered_more_than_2col) ) columns_not_covered=list( set(all_columns)-set(columns_covered_by_lines_covered_more_than_2col) )

@ -46,16 +46,15 @@ def xml_reading_order(page, order_of_texts, id_of_texts, id_of_marginalia):
region_order_sub = ET.SubElement(region_order, 'OrderedGroup') region_order_sub = ET.SubElement(region_order, 'OrderedGroup')
region_order_sub.set('id', "ro357564684568544579089") region_order_sub.set('id', "ro357564684568544579089")
region_counter = EynollahIdCounter() region_counter = EynollahIdCounter()
indexer_region = 0
for idx_textregion, _ in enumerate(order_of_texts): for idx_textregion, _ in enumerate(order_of_texts):
name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region)) name.set('index', str(region_counter.get('region')))
name.set('regionRef', region_counter.region_id(order_of_texts[idx_textregion])) name.set('regionRef', region_counter.region_id(order_of_texts[idx_textregion]))
indexer_region += 1
for id_marginal in id_of_marginalia: for id_marginal in id_of_marginalia:
name = ET.SubElement(region_order_sub, 'RegionRefIndexed') name = ET.SubElement(region_order_sub, 'RegionRefIndexed')
name.set('index', str(indexer_region)) name.set('index', str(region_counter.get('region')))
name.set('regionRef', id_marginal) name.set('regionRef', id_marginal)
region_counter.inc('region')
def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point): def order_and_id_of_texts(found_polygons_text_region, found_polygons_text_region_h, matrix_of_orders, indexes_sorted, index_of_types, kind_of_texts, ref_point):
indexes_sorted = np.array(indexes_sorted) indexes_sorted = np.array(indexes_sorted)

Loading…
Cancel
Save