diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..102f443 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "eynollah" +version = "1.2.3" + + + + +dependencies = [ + "ocrd >= 2.23.3", + "tensorflow >= 2.12.0", + "scikit-learn >= 0.23.2", + "imutils >= 0.5.3", + "numpy < 1.24.0", + "matplotlib", + "torch == 2.0.1", + "transformers == 4.30.2", + "numba == 0.58.1", +] + +[project.scripts] +eynollah = "qurator.eynollah.cli:main" + + +[tool.setuptools.packages.find] +where = ["."] +include = ["qurator"] diff --git a/qurator/eynollah/cli.py b/qurator/eynollah/cli.py index 833e904..6c6561f 100644 --- a/qurator/eynollah/cli.py +++ b/qurator/eynollah/cli.py @@ -3,14 +3,60 @@ import click from ocrd_utils import initLogging, setOverrideLogLevel from qurator.eynollah.eynollah import Eynollah +@click.group() +def main(): + pass -@click.command() +@main.command() +@click.option( + "--dir_xml", + "-dx", + help="directory of GT page-xml files", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_modal_image", + "-domi", + help="directory where ground truth images would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--dir_out_classes", + "-docl", + help="directory where ground truth classes would be written", + type=click.Path(exists=True, file_okay=False), +) + +@click.option( + "--input_height", + "-ih", + help="input height", +) +@click.option( + "--input_width", + "-iw", + help="input width", +) +@click.option( + "--min_area_size", + "-min", + help="min area size of regions considered for reading order training.", +) + +def machine_based_reading_order(dir_xml, dir_out_modal_image, dir_out_classes, input_height, input_width, min_area_size): + xml_files_ind = os.listdir(dir_xml) + + +@main.command() @click.option( "--image", "-i", help="image filename", type=click.Path(exists=True, dir_okay=False), ) + @click.option( "--out", "-o", @@ -146,37 +192,13 @@ from qurator.eynollah.eynollah import Eynollah help="if this parameter set to true, this tool will try to do ocr", ) @click.option( - "--log-level", + "--log_level", "-l", type=click.Choice(['OFF', 'DEBUG', 'INFO', 'WARN', 'ERROR']), help="Override log level globally to this", ) -def main( - image, - out, - dir_in, - model, - save_images, - save_layout, - save_deskewed, - save_all, - save_page, - enable_plotting, - allow_enhancement, - curved_line, - textline_light, - full_layout, - tables, - right2left, - input_binary, - allow_scaling, - headers_off, - light_version, - reading_order_machine_based, - do_ocr, - ignore_page_extraction, - log_level -): + +def layout(image, out, dir_in, model, save_images, save_layout, save_deskewed, save_all, save_page, enable_plotting, allow_enhancement, curved_line, textline_light, full_layout, tables, right2left, input_binary, allow_scaling, headers_off, light_version, reading_order_machine_based, do_ocr, ignore_page_extraction, log_level): if log_level: setOverrideLogLevel(log_level) initLogging() @@ -215,8 +237,6 @@ def main( do_ocr=do_ocr, ) eynollah.run() - #pcgts = eynollah.run() - ##eynollah.writer.write_pagexml(pcgts) if __name__ == "__main__": main() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 530dac2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# ocrd includes opencv, numpy, shapely, click -ocrd >= 2.23.3 -numpy <1.24.0 -scikit-learn >= 0.23.2 -tensorflow >=2.12.0 -imutils >= 0.5.3 -matplotlib -setuptools >= 50 diff --git a/setup.py b/setup.py deleted file mode 100644 index 9abf158..0000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -from setuptools import setup, find_packages -from json import load - -install_requires = open('requirements.txt').read().split('\n') -with open('ocrd-tool.json', 'r', encoding='utf-8') as f: - version = load(f)['version'] - -setup( - name='eynollah', - version=version, - long_description=open('README.md').read(), - long_description_content_type='text/markdown', - author='Vahid Rezanezhad', - url='https://github.com/qurator-spk/eynollah', - license='Apache License 2.0', - namespace_packages=['qurator'], - packages=find_packages(exclude=['tests']), - install_requires=install_requires, - package_data={ - '': ['*.json'] - }, - entry_points={ - 'console_scripts': [ - 'eynollah=qurator.eynollah.cli:main', - 'ocrd-eynollah-segment=qurator.eynollah.ocrd_cli:main', - ] - }, -)