From 4e7e1c06b95e6a761f2232350fa4946452e93be2 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Wed, 3 Jun 2026 20:51:56 +0200 Subject: [PATCH] =?UTF-8?q?trocr=20viarant=20for=20Predictor=20runtime:=20?= =?UTF-8?q?no=20model=20size=20for=20input=5Fshape=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because transformers v4 and v5 API for image preprocessor differs, and the model-internal image input sizes are actually irrelevant, because the preprocessor will resize them anyway, and there is no batch dimension (because the input images will have different shapes), do not advertise this information in `.input_shape`. --- src/eynollah/model_zoo/model_zoo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eynollah/model_zoo/model_zoo.py b/src/eynollah/model_zoo/model_zoo.py index 0dd24a8..49ed8e1 100644 --- a/src/eynollah/model_zoo/model_zoo.py +++ b/src/eynollah/model_zoo/model_zoo.py @@ -379,9 +379,9 @@ class EynollahModelZoo: return text, conf model.predict_on_batch = predict_torch # not actually needed (image processor does resize itself) + # no batch dimension (images passed as list w/ varying shapes) model.input_shape = (None, - proc.image_processor.size.height, - proc.image_processor.size.width, + None, len(proc.image_processor.image_mean)) return model