From 4e7e1c06b95e6a761f2232350fa4946452e93be2 Mon Sep 17 00:00:00 2001
From: Robert Sachunsky <sachunsky@informatik.uni-leipzig.de>
Date: Wed, 3 Jun 2026 20:51:56 +0200
Subject: [PATCH] =?UTF-8?q?trocr=20viarant=20for=20Predictor=20runtime:=20?=
 =?UTF-8?q?no=20model=20size=20for=20input=5Fshape=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because transformers v4 and v5 API for image preprocessor differs,
and the model-internal image input sizes are actually irrelevant,
because the preprocessor will resize them anyway, and there is no
batch dimension (because the input images will have different shapes),
do not advertise this information in `.input_shape`.
---
 src/eynollah/model_zoo/model_zoo.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/eynollah/model_zoo/model_zoo.py b/src/eynollah/model_zoo/model_zoo.py
index 0dd24a8..49ed8e1 100644
--- a/src/eynollah/model_zoo/model_zoo.py
+++ b/src/eynollah/model_zoo/model_zoo.py
@@ -379,9 +379,9 @@ class EynollahModelZoo:
             return text, conf
         model.predict_on_batch = predict_torch
         # not actually needed (image processor does resize itself)
+        # no batch dimension (images passed as list w/ varying shapes)
         model.input_shape = (None,
-                             proc.image_processor.size.height,
-                             proc.image_processor.size.width,
+                             None,
                              len(proc.image_processor.image_mean))
         return model