mirror of
https://github.com/qurator-spk/eynollah.git
synced 2025-12-01 08:44:13 +01:00
models: split into layout, extra and ocr
layout: Everything not OCR or extra ocr: trocr/cnnrnn models extra: obsolete or niche models
This commit is contained in:
parent
000af16a47
commit
095b36c389
1 changed files with 22 additions and 22 deletions
|
|
@ -4,7 +4,7 @@ from .specs import EynollahModelSpec, EynollahModelSpecSet
|
|||
ZENODO = "https://zenodo.org/records/17295988/files"
|
||||
MODELS_VERSION = "v0_7_0"
|
||||
|
||||
def dist_url(dist_name: str) -> str:
|
||||
def dist_url(dist_name: str="layout") -> str:
|
||||
return f'{ZENODO}/models_{dist_name}_{MODELS_VERSION}.zip'
|
||||
|
||||
DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
||||
|
|
@ -14,7 +14,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
variant='',
|
||||
filename="models_eynollah/eynollah-enhancement_20210425",
|
||||
dists=['enhancement', 'layout', 'ci'],
|
||||
dist_url=dist_url("enhancement"),
|
||||
dist_url=dist_url(),
|
||||
type='Keras',
|
||||
),
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
variant='hybrid',
|
||||
filename="models_eynollah/eynollah-binarization-hybrid_20230504/model_bin_hybrid_trans_cnn_sbb_ens",
|
||||
dists=['layout', 'binarization', ],
|
||||
dist_url=dist_url("binarization"),
|
||||
dist_url=dist_url(),
|
||||
type='Keras',
|
||||
),
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
variant='20210309',
|
||||
filename="models_eynollah/eynollah-binarization_20210309",
|
||||
dists=['binarization'],
|
||||
dist_url=dist_url("binarization"),
|
||||
dist_url=dist_url("extra"),
|
||||
type='Keras',
|
||||
),
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
variant='',
|
||||
filename="models_eynollah/eynollah-binarization_20210425",
|
||||
dists=['binarization'],
|
||||
dist_url=dist_url("binarization"),
|
||||
dist_url=dist_url("extra"),
|
||||
type='Keras',
|
||||
),
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="col_classifier",
|
||||
variant='',
|
||||
filename="models_eynollah/eynollah-column-classifier_20210425",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -58,7 +58,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="page",
|
||||
variant='',
|
||||
filename="models_eynollah/model_eynollah_page_extraction_20250915",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -67,7 +67,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="region",
|
||||
variant='',
|
||||
filename="models_eynollah/eynollah-main-regions-ensembled_20210425",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -76,7 +76,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="region",
|
||||
variant='extract_only_images',
|
||||
filename="models_eynollah/eynollah-main-regions_20231127_672_org_ens_11_13_16_17_18",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -85,7 +85,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="region",
|
||||
variant='light',
|
||||
filename="models_eynollah/eynollah-main-regions_20220314",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
help="early layout",
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
|
|
@ -95,7 +95,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="region_p2",
|
||||
variant='',
|
||||
filename="models_eynollah/eynollah-main-regions-aug-rotation_20210425",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
help="early layout, non-light, 2nd part",
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
|
|
@ -110,7 +110,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
#filename="models_eynollah/modelens_1_2_4_5_early_lay_1_2_spaltige",
|
||||
#filename="models_eynollah/model_3_eraly_layout_no_patches_1_2_spaltige",
|
||||
filename="models_eynollah/modelens_e_l_all_sp_0_1_2_3_4_171024",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url("all"),
|
||||
dists=['layout'],
|
||||
help="early layout, light, 1-or-2-column",
|
||||
type='Keras',
|
||||
|
|
@ -126,7 +126,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
#'filename="models_eynollah/modelens_full_lay_1_2_221024",
|
||||
#'filename="models_eynollah/eynollah-full-regions-1column_20210425",
|
||||
filename="models_eynollah/modelens_full_lay_1__4_3_091124",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
help="full layout / no patches",
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
|
|
@ -146,7 +146,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
# filename="models_eynollah/modelens_full_layout_24_till_28",
|
||||
# filename="models_eynollah/model_2_full_layout_new_trans",
|
||||
filename="models_eynollah/modelens_full_lay_1__4_3_091124",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
help="full layout / with patches",
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
|
|
@ -161,7 +161,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
#filename="models_eynollah/model_mb_ro_aug_ens_8",
|
||||
#filename="models_eynollah/model_ens_reading_order_machine_based",
|
||||
filename="models_eynollah/model_eynollah_reading_order_20250824",
|
||||
dist_url=dist_url("reading_order"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout', 'reading_order'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -176,7 +176,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
#filename="models_eynollah/modelens_textline_9_12_13_14_15",
|
||||
#filename="models_eynollah/eynollah-textline_20210425",
|
||||
filename="models_eynollah/modelens_textline_0_1__2_4_16092024",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -186,7 +186,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
variant='light',
|
||||
#filename="models_eynollah/eynollah-textline_light_20210425",
|
||||
filename="models_eynollah/modelens_textline_0_1__2_4_16092024",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -195,7 +195,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="table",
|
||||
variant='',
|
||||
filename="models_eynollah/eynollah-tables_20210319",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -204,7 +204,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="table",
|
||||
variant='light',
|
||||
filename="models_eynollah/modelens_table_0t4_201124",
|
||||
dist_url=dist_url("layout"),
|
||||
dist_url=dist_url(),
|
||||
dists=['layout'],
|
||||
type='Keras',
|
||||
),
|
||||
|
|
@ -250,7 +250,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="ocr",
|
||||
variant='tr',
|
||||
filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
|
||||
dist_url=dist_url("trocr"),
|
||||
dist_url=dist_url("ocr"),
|
||||
help='much slower transformer-based',
|
||||
dists=['trocr'],
|
||||
type='Keras',
|
||||
|
|
@ -260,7 +260,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="trocr_processor",
|
||||
variant='',
|
||||
filename="models_eynollah/model_eynollah_ocr_trocr_20250919",
|
||||
dist_url=dist_url("trocr"),
|
||||
dist_url=dist_url("ocr"),
|
||||
dists=['trocr'],
|
||||
type='TrOCRProcessor',
|
||||
),
|
||||
|
|
@ -269,7 +269,7 @@ DEFAULT_MODEL_SPECS = EynollahModelSpecSet([
|
|||
category="trocr_processor",
|
||||
variant='htr',
|
||||
filename="models_eynollah/microsoft/trocr-base-handwritten",
|
||||
dist_url=dist_url("trocr"),
|
||||
dist_url=dist_url("extra"),
|
||||
dists=['trocr'],
|
||||
type='TrOCRProcessor',
|
||||
),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue