adapt to Numpy 1.25 changes…

(esp. `np.array(...)` now not allowed on ragged arrays unless
 `dtype=object`, but then coercing sub-arrays to `object` as well)
This commit is contained in:
Robert Sachunsky 2026-01-20 04:04:07 +01:00
parent 9fdae72e96
commit e2754da4f5
3 changed files with 30 additions and 15 deletions

View file

@ -117,6 +117,7 @@ from .utils.marginals import get_marginals
from .utils.resize import resize_image from .utils.resize import resize_image
from .utils.shm import share_ndarray from .utils.shm import share_ndarray
from .utils import ( from .utils import (
ensure_array,
is_image_filename, is_image_filename,
boosting_headers_by_longshot_region_segmentation, boosting_headers_by_longshot_region_segmentation,
crop_image_inside_box, crop_image_inside_box,
@ -2475,8 +2476,8 @@ class Eynollah:
self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot): self, contours_only_text_parent, contours_only_text_parent_h, boxes, textline_mask_tot):
self.logger.debug("enter do_order_of_regions") self.logger.debug("enter do_order_of_regions")
contours_only_text_parent = np.array(contours_only_text_parent) contours_only_text_parent = ensure_array(contours_only_text_parent)
contours_only_text_parent_h = np.array(contours_only_text_parent_h) contours_only_text_parent_h = ensure_array(contours_only_text_parent_h)
boxes = np.array(boxes, dtype=int) # to be on the safe side boxes = np.array(boxes, dtype=int) # to be on the safe side
c_boxes = np.stack((0.5 * boxes[:, 2:4].sum(axis=1), c_boxes = np.stack((0.5 * boxes[:, 2:4].sum(axis=1),
0.5 * boxes[:, 0:2].sum(axis=1))) 0.5 * boxes[:, 0:2].sum(axis=1)))
@ -3987,7 +3988,7 @@ class Eynollah:
def filterfun(lis): def filterfun(lis):
if len(lis) == 0: if len(lis) == 0:
return [] return []
return list(np.array(lis)[indices]) return list(ensure_array(lis)[indices])
return (filterfun(contours_par), return (filterfun(contours_par),
filterfun(contours_textline), filterfun(contours_textline),
@ -4378,7 +4379,8 @@ class Eynollah:
areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent]) areas_cnt_text = np.array([cv2.contourArea(c) for c in contours_only_text_parent])
areas_cnt_text = areas_cnt_text / float(areas_tot_text) areas_cnt_text = areas_cnt_text / float(areas_tot_text)
#self.logger.info('areas_cnt_text %s', areas_cnt_text) #self.logger.info('areas_cnt_text %s', areas_cnt_text)
contours_only_text_parent = np.array(contours_only_text_parent)[areas_cnt_text > MIN_AREA_REGION] contours_only_text_parent = ensure_array(contours_only_text_parent)
contours_only_text_parent = contours_only_text_parent[areas_cnt_text > MIN_AREA_REGION]
areas_cnt_text_parent = areas_cnt_text[areas_cnt_text > MIN_AREA_REGION] areas_cnt_text_parent = areas_cnt_text[areas_cnt_text > MIN_AREA_REGION]
index_con_parents = np.argsort(areas_cnt_text_parent) index_con_parents = np.argsort(areas_cnt_text_parent)
@ -4397,12 +4399,13 @@ class Eynollah:
areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d]) areas_cnt_text_d = np.array([cv2.contourArea(c) for c in contours_only_text_parent_d])
areas_cnt_text_d = areas_cnt_text_d / float(areas_tot_text_d) areas_cnt_text_d = areas_cnt_text_d / float(areas_tot_text_d)
contours_only_text_parent_d = np.array(contours_only_text_parent_d)[areas_cnt_text_d > MIN_AREA_REGION] contours_only_text_parent_d = ensure_array(contours_only_text_parent_d)
contours_only_text_parent_d = contours_only_text_parent_d[areas_cnt_text_d > MIN_AREA_REGION]
areas_cnt_text_d = areas_cnt_text_d[areas_cnt_text_d > MIN_AREA_REGION] areas_cnt_text_d = areas_cnt_text_d[areas_cnt_text_d > MIN_AREA_REGION]
if len(contours_only_text_parent_d): if len(contours_only_text_parent_d):
index_con_parents_d = np.argsort(areas_cnt_text_d) index_con_parents_d = np.argsort(areas_cnt_text_d)
contours_only_text_parent_d = np.array(contours_only_text_parent_d)[index_con_parents_d] contours_only_text_parent_d = contours_only_text_parent_d[index_con_parents_d]
areas_cnt_text_d = areas_cnt_text_d[index_con_parents_d] areas_cnt_text_d = areas_cnt_text_d[index_con_parents_d]
centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d)) # [2, N] centers_d = np.stack(find_center_of_contours(contours_only_text_parent_d)) # [2, N]
@ -4546,9 +4549,10 @@ class Eynollah:
#print("text region early 3 in %.1fs", time.time() - t0) #print("text region early 3 in %.1fs", time.time() - t0)
if self.light_version: if self.light_version:
contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent) contours_only_text_parent = dilate_textregion_contours(contours_only_text_parent)
contours_only_text_parent , contours_only_text_parent_d_ordered = self.filter_contours_inside_a_bigger_one( contours_only_text_parent, contours_only_text_parent_d_ordered = \
contours_only_text_parent, contours_only_text_parent_d_ordered, text_only, self.filter_contours_inside_a_bigger_one(
marginal_cnts=polygons_of_marginals) contours_only_text_parent, contours_only_text_parent_d_ordered, text_only,
marginal_cnts=polygons_of_marginals)
#print("text region early 3.5 in %.1fs", time.time() - t0) #print("text region early 3.5 in %.1fs", time.time() - t0)
conf_contours_textregions = get_textregion_contours_in_org_image_light( conf_contours_textregions = get_textregion_contours_in_org_image_light(
contours_only_text_parent, self.image, confidence_matrix) contours_only_text_parent, self.image, confidence_matrix)

View file

@ -1,4 +1,4 @@
from typing import List, Tuple from typing import Iterable, List, Tuple
from logging import getLogger from logging import getLogger
import time import time
import math import math
@ -1929,3 +1929,11 @@ def is_image_filename(fname: str) -> bool:
def is_xml_filename(fname: str) -> bool: def is_xml_filename(fname: str) -> bool:
return fname.lower().endswith('.xml') return fname.lower().endswith('.xml')
def ensure_array(obj: Iterable) -> np.ndarray:
"""convert sequence to array of type `object` so items can be of heterogeneous shape
(but ensure not to convert inner arrays to `object` if len=1)
"""
if not isinstance(obj, np.ndarray):
return np.fromiter(obj, object)
return obj

View file

@ -12,6 +12,7 @@ from shapely import set_precision
from shapely.ops import unary_union, nearest_points from shapely.ops import unary_union, nearest_points
from .rotate import rotate_image, rotation_image_new from .rotate import rotate_image, rotation_image_new
from . import ensure_array
def contours_in_same_horizon(cy_main_hor): def contours_in_same_horizon(cy_main_hor):
""" """
@ -248,13 +249,15 @@ def return_contours_of_image(image):
return contours, hierarchy return contours, hierarchy
def dilate_textline_contours(all_found_textline_polygons): def dilate_textline_contours(all_found_textline_polygons):
return [[polygon2contour(contour2polygon(contour, dilate=6)) return [ensure_array(
for contour in region] [polygon2contour(contour2polygon(contour, dilate=6))
for contour in region])
for region in all_found_textline_polygons] for region in all_found_textline_polygons]
def dilate_textregion_contours(all_found_textline_polygons): def dilate_textregion_contours(all_found_textregion_polygons):
return [polygon2contour(contour2polygon(contour, dilate=6)) return ensure_array(
for contour in all_found_textline_polygons] [polygon2contour(contour2polygon(contour, dilate=6))
for contour in all_found_textregion_polygons])
def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0): def contour2polygon(contour: Union[np.ndarray, Sequence[Sequence[Sequence[Number]]]], dilate=0):
polygon = Polygon([point[0] for point in contour]) polygon = Polygon([point[0] for point in contour])