From 75796b5c0c70f52e14adb1bb3c21baf1ba7699f6 Mon Sep 17 00:00:00 2001 From: Kai Date: Fri, 10 Jun 2022 10:00:32 +0200 Subject: [PATCH] refactor --- README.md | 37 ++++++++++++++++++++++++++++++++++++- requirements.txt | 2 +- setup.py | 2 +- tsvtools/cli.py | 1 + 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 925edca..6ecae18 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,11 @@ # TSV - Processing Tools +Create .tsv files that can be viewed and edited with [neat](https://github.com/qurator-spk/neat). + ## Installation: +Clone this project and the [SBB-utils](https://github.com/qurator-spk/sbb_utils). + Setup virtual environment: ``` virtualenv --python=python3.6 venv @@ -19,7 +23,8 @@ pip install -U pip Install package together with its dependencies in development mode: ``` -pip install -e ./ +pip install -e sbb_utils +pip install -e page2tsv ``` ## PAGE-XML to TSV Transformation: @@ -59,3 +64,33 @@ Create a URL-annotated TSV file from an existing TSV file: ``` annotate-tsv enp_DE.tsv enp_DE-annotated.tsv ``` + +# Command-line interface: + +``` +page2tsv [OPTIONS] PAGE_XML_FILE TSV_OUT_FILE + +Options: + --purpose [NERD|OCR] Purpose of output tsv file. + + NERD: NER/NED application/ground-truth creation. + + OCR: OCR application/ground-truth creation. + + default: NERD. + --image-url TEXT + --ner-rest-endpoint TEXT REST endpoint of sbb_ner service. See + https://github.com/qurator-spk/sbb_ner for + details. Only applicable in case of NERD. + --ned-rest-endpoint TEXT REST endpoint of sbb_ned service. See + https://github.com/qurator-spk/sbb_ned for + details. Only applicable in case of NERD. + --noproxy disable proxy. default: enabled. + --scale-factor FLOAT default: 1.0 + --ned-threshold FLOAT + --min-confidence FLOAT + --max-confidence FLOAT + --ned-priority INTEGER + --help Show this message and exit. + +``` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b7eb78b..5606eca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ ocrd >= 2.23.2 pandas matplotlib -qurator-sbb-tools \ No newline at end of file +qurator-sbb-utils \ No newline at end of file diff --git a/setup.py b/setup.py index eba3415..1e0ebf1 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open('requirements.txt') as fp: setup( name="tsvtools", - version="0.0.1", + version="0.0.2", author="", author_email="qurator@sbb.spk-berlin.de", description="neath", diff --git a/tsvtools/cli.py b/tsvtools/cli.py index 2390aef..a62c06c 100644 --- a/tsvtools/cli.py +++ b/tsvtools/cli.py @@ -19,6 +19,7 @@ from .ocr import get_conf_color from qurator.utils.ner import ner from qurator.utils.ned import ned + @click.command() @click.argument('tsv-file', type=click.Path(exists=True), required=True, nargs=1) @click.argument('url-file', type=click.Path(exists=False), required=True, nargs=1)