From 576e120ba67c221f6a917c6971bd69be6a4b2f86 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Fri, 13 Mar 2026 18:15:30 +0100 Subject: [PATCH] =?UTF-8?q?autosized=20prediction=20is=20only=20faster=20f?= =?UTF-8?q?or=20=5Fpatched,=20not=20for=20=5Fresized=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When 338c4a0e wrapped all prediction models for automatic image size adaptation in CUDA, - tiling (`_patched`) was indeed faster - whole (`_resized`) was actually slower So this reverts the latter part. --- src/eynollah/eynollah.py | 22 ++++++++++++++-------- src/eynollah/model_zoo/model_zoo.py | 5 +++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/eynollah/eynollah.py b/src/eynollah/eynollah.py index fcc92f0..683fdba 100644 --- a/src/eynollah/eynollah.py +++ b/src/eynollah/eynollah.py @@ -889,8 +889,8 @@ class Eynollah: self.logger.debug("enter extract_text_regions") img_height_h = img.shape[0] img_width_h = img.shape[1] - #model_name = "region_fl" if patches else "region_fl_np" - model_name = "region_fl_patched" if patches else "region_fl_np_resized" + #model_name = "region_fl_patched" if patches else "region_fl_np_resized" + model_name = "region_fl_patched" if patches else "region_fl_np" model_region = self.model_zoo.get(model_name) thresholding_for_heading = True @@ -911,9 +911,15 @@ class Eynollah: else: img = resize_image(img, int(img_height_h * 2500 / float(img_width_h)), 2500).astype(np.uint8) - prediction_regions, _ = self.do_prediction_new_concept_autosize( - img, model_region, - thresholding_for_heading=thresholding_for_heading) + if patches: + prediction_regions, _ = self.do_prediction_new_concept_autosize( + img, model_region, + thresholding_for_heading=True) + else: + prediction_regions = self.do_prediction( + False, img, model_region, + n_batch_inference=2, + thresholding_for_heading=False) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) self.logger.debug("exit extract_text_regions") return prediction_regions @@ -1125,8 +1131,8 @@ class Eynollah: prediction_regions_org = np.zeros((img_height_org, img_width_org), dtype=np.uint8) confidence_matrix = np.zeros((img_height_org, img_width_org)) prediction_regions_page, confidence_matrix_page = \ - self.do_prediction_new_concept_autosize( - image['img_page'], self.model_zoo.get("region_1_2_resized"), + self.do_prediction_new_concept( + False, image['img_page'], self.model_zoo.get("region_1_2"), thresholding_for_artificial_class=True, threshold_art_class=self.threshold_art_class_layout) ys = slice(*image['coord_page'][0:2]) @@ -1523,7 +1529,7 @@ class Eynollah: return image_revised_last def get_tables_from_model(self, img): - prediction_table, _ = self.do_prediction_new_concept_autosize(img, self.model_zoo.get("table_resized")) + prediction_table, _ = self.do_prediction_new_concept(False, img, self.model_zoo.get("table")) prediction_table = prediction_table.astype(np.uint8) return prediction_table diff --git a/src/eynollah/model_zoo/model_zoo.py b/src/eynollah/model_zoo/model_zoo.py index 8638b65..8060006 100644 --- a/src/eynollah/model_zoo/model_zoo.py +++ b/src/eynollah/model_zoo/model_zoo.py @@ -143,8 +143,9 @@ class EynollahModelZoo: ) model._name = model_category self._loaded[model_category] = model - if model_category in ['region_1_2', 'table', 'region_fl_np']: - self._loaded[model_category + '_resized'] = wrap_layout_model_resized(model) + # autosized for full page images is too slow (better resize on CPU in numpy): + # if model_category in ['region_1_2', 'table', 'region_fl_np']: + # self._loaded[model_category + '_resized'] = wrap_layout_model_resized(model) if model_category in ['region_1_2', 'textline']: self._loaded[model_category + '_patched'] = wrap_layout_model_patched(model) return model # type: ignore