do_prediction*: remove autosized variants, simplify

This commit is contained in:
Robert Sachunsky 2026-04-20 17:17:43 +02:00
parent de65a55a04
commit 829256df91

View file

@ -812,6 +812,10 @@ class Eynollah:
gc.collect() gc.collect()
return prediction, confidence return prediction, confidence
# variant of do_prediction_new_concept with no need
# for resizing or tiling into patches - done on model
# (Tensorflow/CUDA) side
# (after loading wrapped resized or patched model)
def do_prediction_new_concept_autosize( def do_prediction_new_concept_autosize(
self, img, model, self, img, model,
n_batch_inference=None, n_batch_inference=None,
@ -820,7 +824,6 @@ class Eynollah:
threshold_art_class=0.1, threshold_art_class=0.1,
artificial_class=4, artificial_class=4,
): ):
self.logger.debug("enter do_prediction_new_concept (%s)", model.name) self.logger.debug("enter do_prediction_new_concept (%s)", model.name)
img = img / 255.0 img = img / 255.0
img = img.astype(np.float16) img = img.astype(np.float16)
@ -904,16 +907,11 @@ class Eynollah:
img_height_h = img.shape[0] img_height_h = img.shape[0]
img_width_h = img.shape[1] img_width_h = img.shape[1]
if patches: prediction_regions, confidence_regions = self.do_prediction_new_concept(
# prediction_regions, confidence_regions = self.do_prediction_new_concept_autosize( patches, img, self.model_zoo.get("region_fl" if patches else "region_fl_np"),
# img, self.model_zoo.get("region_fl_patched")) n_batch_inference=1,
prediction_regions, confidence_regions = self.do_prediction_new_concept( thresholding_for_heading=not patches)
True, img, self.model_zoo.get("region_fl"),
n_batch_inference=1)
else:
prediction_regions, confidence_regions = self.do_prediction_new_concept(
False, img, self.model_zoo.get("region_fl_np"),
thresholding_for_heading=True)
self.logger.debug("exit extract_text_regions_new") self.logger.debug("exit extract_text_regions_new")
return prediction_regions, confidence_regions return prediction_regions, confidence_regions
@ -921,9 +919,10 @@ class Eynollah:
self.logger.debug("enter extract_text_regions") self.logger.debug("enter extract_text_regions")
img_height_h = img.shape[0] img_height_h = img.shape[0]
img_width_h = img.shape[1] img_width_h = img.shape[1]
model_region = self.model_zoo.get("region_fl") if patches else self.model_zoo.get("region_fl_np") model_region = self.model_zoo.get("region_fl" if patches else "region_fl_np")
prediction_regions = self.do_prediction(patches, img, model_region, marginal_of_patch_percent=0.1) prediction_regions = self.do_prediction(patches, img, model_region,
marginal_of_patch_percent=0.1)
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
self.logger.debug("exit extract_text_regions") self.logger.debug("exit extract_text_regions")
return prediction_regions return prediction_regions
@ -1059,18 +1058,19 @@ class Eynollah:
def textline_contours(self, img, use_patches): def textline_contours(self, img, use_patches):
self.logger.debug('enter textline_contours') self.logger.debug('enter textline_contours')
kwargs = dict(artificial_class=2, if (self.tables or
n_batch_inference=1 if self.tables else 3, # avoid OOM self.reading_order_machine_based or
thresholding_for_artificial_class=True, self.input_binary):
threshold_art_class=self.threshold_art_class_textline) # avoid OOM
if use_patches: n_batch = 1
# prediction_textline, conf_textline = self.do_prediction_new_concept_autosize(
# img, self.model_zoo.get("textline_patched"), **kwargs)
prediction_textline, conf_textline = self.do_prediction_new_concept(
True, img, self.model_zoo.get("textline"), **kwargs)
else: else:
prediction_textline, conf_textline = self.do_prediction_new_concept( n_batch = 3
False, img, self.model_zoo.get("textline"), **kwargs) prediction_textline, conf_textline = self.do_prediction_new_concept(
use_patches, img, self.model_zoo.get("textline"),
artificial_class=2,
n_batch_inference=n_batch,
thresholding_for_artificial_class=True,
threshold_art_class=self.threshold_art_class_textline)
#prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline")) #prediction_textline_longshot = self.do_prediction(False, img, self.model_zoo.get("textline"))
@ -1109,9 +1109,9 @@ class Eynollah:
img_w_new = 4000 img_w_new = 4000
img_h_new = img_w_new * img.shape[0] // img.shape[1] img_h_new = img_w_new * img.shape[0] // img.shape[1]
img_resized = resize_image(img, img_h_new, img_w_new) img_resized = resize_image(img, img_h_new, img_w_new)
self.logger.debug("detecting textlines on %s with %d colors", self.logger.debug("detecting textlines on %s with %d colors",
str(img_resized.shape), len(np.unique(img_resized))) str(img_resized.shape), len(np.unique(img_resized)))
textline_mask_tot_ea, confidence_textline = self.run_textline(img_resized) textline_mask_tot_ea, confidence_textline = self.run_textline(img_resized)
textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h) textline_mask_tot_ea = resize_image(textline_mask_tot_ea, img_height_h, img_width_h)
confidence_textline = resize_image(confidence_textline, img_height_h, img_width_h) confidence_textline = resize_image(confidence_textline, img_height_h, img_width_h)
@ -1123,26 +1123,14 @@ class Eynollah:
return None, erosion_hurts, None, None, textline_mask_tot_ea, None, None return None, erosion_hurts, None, None, textline_mask_tot_ea, None, None
#print("inside 2 ", time.time()-t_in) #print("inside 2 ", time.time()-t_in)
kwargs = dict(n_batch_inference=1,
thresholding_for_artificial_class=True,
threshold_art_class=self.threshold_art_class_layout,
separator_class=label_seps)
if num_col_classifier == 1 or num_col_classifier == 2: if num_col_classifier == 1 or num_col_classifier == 2:
if img_height_h / img_width_h > 2.5: if img_height_h / img_width_h > 2.5:
self.logger.debug("resized to %dx%d for %d cols", patches = True
img_resized.shape[1], img_resized.shape[0], num_col_classifier)
prediction_regions, confidence_regions = (
# self.do_prediction_new_concept_autosize(
# img_resized, self.model_zoo.get("region_1_2_patched"),
self.do_prediction_new_concept(
True, img_resized, self.model_zoo.get("region_1_2"),
**kwargs)
)
else: else:
prediction_regions, confidence_regions = \ patches = False
self.do_prediction_new_concept( self.logger.debug("resized to %dx%d for %d cols",
False, img_resized, self.model_zoo.get("region_1_2"), img_resized.shape[1], img_resized.shape[0],
**kwargs) num_col_classifier)
else: else:
new_w = (900+ (num_col_classifier-3)*100) new_w = (900+ (num_col_classifier-3)*100)
new_h = new_w * img.shape[0] // img.shape[1] new_h = new_w * img.shape[0] // img.shape[1]
@ -1150,13 +1138,15 @@ class Eynollah:
self.logger.debug("resized to %dx%d (new_w=%d) for %d cols", self.logger.debug("resized to %dx%d (new_w=%d) for %d cols",
img_resized.shape[1], img_resized.shape[0], img_resized.shape[1], img_resized.shape[0],
new_w, num_col_classifier) new_w, num_col_classifier)
prediction_regions, confidence_regions = ( patches = True
# self.do_prediction_new_concept_autosize(
# img_resized, self.model_zoo.get("region_1_2_patched"), prediction_regions, confidence_regions = \
self.do_prediction_new_concept( self.do_prediction_new_concept(
True, img_resized, self.model_zoo.get("region_1_2"), patches, img_resized, self.model_zoo.get("region_1_2"),
**kwargs) n_batch_inference=1,
) thresholding_for_artificial_class=True,
threshold_art_class=self.threshold_art_class_layout,
separator_class=label_seps)
prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h) prediction_regions = resize_image(prediction_regions, img_height_h, img_width_h)
confidence_regions = resize_image(confidence_regions, img_height_h, img_width_h) confidence_regions = resize_image(confidence_regions, img_height_h, img_width_h)