From 095b36c3896429143d19458f0c8f682587c1306f Mon Sep 17 00:00:00 2001 From: kba Date: Wed, 26 Nov 2025 19:45:58 +0100 Subject: [PATCH] models: split into layout, extra and ocr layout: Everything not OCR or extra ocr: trocr/cnnrnn models extra: obsolete or niche models --- src/eynollah/model_zoo/default_specs.py | 44 ++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/eynollah/model_zoo/default_specs.py b/src/eynollah/model_zoo/default_specs.py index a720fa0..8138ec5 100644 --- a/src/eynollah/model_zoo/default_specs.py +++ b/src/eynollah/model_zoo/default_specs.py @@ -4,7 +4,7 @@ from .specs import EynollahModelSpec, EynollahModelSpecSet ZENODO = "https://zenodo.org/records/17295988/files" MODELS_VERSION = "v0_7_0" -def dist_url(dist_name: str) -> str: +def dist_url(dist_name: str="layout") -> str: return f'{ZENODO}/models_{dist_name}_{MODELS_VERSION}.zip' DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ @@ -14,7 +14,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ variant='', filename="models_eynollah/eynollah-enhancement_20210425", dists=['enhancement', 'layout', 'ci'], - dist_url=dist_url("enhancement"), + dist_url=dist_url(), type='Keras', ), @@ -23,7 +23,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ variant='hybrid', filename="models_eynollah/eynollah-binarization-hybrid_20230504/model_bin_hybrid_trans_cnn_sbb_ens", dists=['layout', 'binarization', ], - dist_url=dist_url("binarization"), + dist_url=dist_url(), type='Keras', ), @@ -32,7 +32,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ variant='20210309', filename="models_eynollah/eynollah-binarization_20210309", dists=['binarization'], - dist_url=dist_url("binarization"), + dist_url=dist_url("extra"), type='Keras', ), @@ -41,7 +41,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ variant='', filename="models_eynollah/eynollah-binarization_20210425", dists=['binarization'], - dist_url=dist_url("binarization"), + dist_url=dist_url("extra"), type='Keras', ), @@ -49,7 +49,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="col_classifier", variant='', filename="models_eynollah/eynollah-column-classifier_20210425", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -58,7 +58,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="page", variant='', filename="models_eynollah/model_eynollah_page_extraction_20250915", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -67,7 +67,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="region", variant='', filename="models_eynollah/eynollah-main-regions-ensembled_20210425", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -76,7 +76,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="region", variant='extract_only_images', filename="models_eynollah/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -85,7 +85,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="region", variant='light', filename="models_eynollah/eynollah-main-regions_20220314", - dist_url=dist_url("layout"), + dist_url=dist_url(), help="early layout", dists=['layout'], type='Keras', @@ -95,7 +95,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="region_p2", variant='', filename="models_eynollah/eynollah-main-regions-aug-rotation_20210425", - dist_url=dist_url("layout"), + dist_url=dist_url(), help="early layout, non-light, 2nd part", dists=['layout'], type='Keras', @@ -110,7 +110,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ #filename="models_eynollah/modelens_1_2_4_5_early_lay_1_2_spaltige", #filename="models_eynollah/model_3_eraly_layout_no_patches_1_2_spaltige", filename="models_eynollah/modelens_e_l_all_sp_0_1_2_3_4_171024", - dist_url=dist_url("layout"), + dist_url=dist_url("all"), dists=['layout'], help="early layout, light, 1-or-2-column", type='Keras', @@ -126,7 +126,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ #'filename="models_eynollah/modelens_full_lay_1_2_221024", #'filename="models_eynollah/eynollah-full-regions-1column_20210425", filename="models_eynollah/modelens_full_lay_1__4_3_091124", - dist_url=dist_url("layout"), + dist_url=dist_url(), help="full layout / no patches", dists=['layout'], type='Keras', @@ -146,7 +146,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ # filename="models_eynollah/modelens_full_layout_24_till_28", # filename="models_eynollah/model_2_full_layout_new_trans", filename="models_eynollah/modelens_full_lay_1__4_3_091124", - dist_url=dist_url("layout"), + dist_url=dist_url(), help="full layout / with patches", dists=['layout'], type='Keras', @@ -161,7 +161,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ #filename="models_eynollah/model_mb_ro_aug_ens_8", #filename="models_eynollah/model_ens_reading_order_machine_based", filename="models_eynollah/model_eynollah_reading_order_20250824", - dist_url=dist_url("reading_order"), + dist_url=dist_url(), dists=['layout', 'reading_order'], type='Keras', ), @@ -176,7 +176,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ #filename="models_eynollah/modelens_textline_9_12_13_14_15", #filename="models_eynollah/eynollah-textline_20210425", filename="models_eynollah/modelens_textline_0_1__2_4_16092024", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -186,7 +186,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ variant='light', #filename="models_eynollah/eynollah-textline_light_20210425", filename="models_eynollah/modelens_textline_0_1__2_4_16092024", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -195,7 +195,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="table", variant='', filename="models_eynollah/eynollah-tables_20210319", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -204,7 +204,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="table", variant='light', filename="models_eynollah/modelens_table_0t4_201124", - dist_url=dist_url("layout"), + dist_url=dist_url(), dists=['layout'], type='Keras', ), @@ -250,7 +250,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="ocr", variant='tr', filename="models_eynollah/model_eynollah_ocr_trocr_20250919", - dist_url=dist_url("trocr"), + dist_url=dist_url("ocr"), help='much slower transformer-based', dists=['trocr'], type='Keras', @@ -260,7 +260,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="trocr_processor", variant='', filename="models_eynollah/model_eynollah_ocr_trocr_20250919", - dist_url=dist_url("trocr"), + dist_url=dist_url("ocr"), dists=['trocr'], type='TrOCRProcessor', ), @@ -269,7 +269,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([ category="trocr_processor", variant='htr', filename="models_eynollah/microsoft/trocr-base-handwritten", - dist_url=dist_url("trocr"), + dist_url=dist_url("extra"), dists=['trocr'], type='TrOCRProcessor', ),