mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-10-06 22:50:14 +02:00
get textlines sorted in textregion - verticals
This commit is contained in:
parent
6904a98182
commit
6d8641a518
1 changed files with 51 additions and 23 deletions
|
@ -1759,14 +1759,19 @@ class Eynollah:
|
||||||
if N==0:
|
if N==0:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
diff_matrix = np.abs(np.subtract.outer(cy_textline, cy_textline))
|
diff_cy = np.abs( np.diff(sorted(cy_textline)) )
|
||||||
|
diff_cx = np.abs(np.diff(sorted(cx_textline)) )
|
||||||
|
|
||||||
non_zero_diffs = diff_matrix[diff_matrix > 0]
|
|
||||||
if len(non_zero_diffs) == 0:
|
if len(diff_cy)>0:
|
||||||
mean_y_diff = 0
|
mean_y_diff = np.mean(diff_cy)
|
||||||
|
mean_x_diff = np.mean(diff_cx)
|
||||||
else:
|
else:
|
||||||
mean_y_diff = np.mean(non_zero_diffs)
|
mean_y_diff = 0
|
||||||
|
mean_x_diff = 0
|
||||||
|
|
||||||
|
|
||||||
|
if np.int(mean_y_diff) >= np.int(mean_x_diff):
|
||||||
row_threshold = mean_y_diff / 2 if mean_y_diff > 0 else 10
|
row_threshold = mean_y_diff / 2 if mean_y_diff > 0 else 10
|
||||||
|
|
||||||
indices_sorted_by_y = sorted(range(N), key=lambda i: cy_textline[i])
|
indices_sorted_by_y = sorted(range(N), key=lambda i: cy_textline[i])
|
||||||
|
@ -1789,6 +1794,29 @@ class Eynollah:
|
||||||
for idx in row_sorted:
|
for idx in row_sorted:
|
||||||
sorted_textlines.append(textlines_textregion[idx])
|
sorted_textlines.append(textlines_textregion[idx])
|
||||||
|
|
||||||
|
else:
|
||||||
|
row_threshold = mean_x_diff / 2 if mean_x_diff > 0 else 10
|
||||||
|
indices_sorted_by_x = sorted(range(N), key=lambda i: cx_textline[i])
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
current_row = [indices_sorted_by_x[0]]
|
||||||
|
|
||||||
|
for i in range(1, N):
|
||||||
|
current_idy = indices_sorted_by_x[i]
|
||||||
|
prev_idy = current_row[0]
|
||||||
|
if abs(cx_textline[current_idy] - cx_textline[prev_idy] ) <= row_threshold:
|
||||||
|
current_row.append(current_idy)
|
||||||
|
else:
|
||||||
|
rows.append(current_row)
|
||||||
|
current_row = [current_idy]
|
||||||
|
rows.append(current_row)
|
||||||
|
|
||||||
|
sorted_textlines = []
|
||||||
|
for row in rows:
|
||||||
|
row_sorted = sorted(row , key=lambda i: cy_textline[i])
|
||||||
|
for idy in row_sorted:
|
||||||
|
sorted_textlines.append(textlines_textregion[idy])
|
||||||
|
|
||||||
return sorted_textlines
|
return sorted_textlines
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue