From ccb7f34ab76429d4b16c7c32e591cb1507f82676 Mon Sep 17 00:00:00 2001 From: "Gerber, Mike" Date: Mon, 9 Dec 2019 14:28:07 +0100 Subject: [PATCH] =?UTF-8?q?=E2=AC=86=20train-calamari-gt4histocr:=20Update?= =?UTF-8?q?=20to=20Calamari=201?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data | 2 +- requirements.txt | 4 ++-- train.sh | 11 +++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/data b/data index f817209..b0c0cd0 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit f817209ba765464adb132a132774ea7856d53f4e +Subproject commit b0c0cd08551ba1828ea73833034188047f3e14ab diff --git a/requirements.txt b/requirements.txt index 21daa14..5f95a5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -calamari-ocr==0.3.5 -tensorflow-gpu==1.13.1 +calamari-ocr==1.0.1 +tensorflow-gpu==2.0.0 diff --git a/train.sh b/train.sh index d8e1eb8..21c9b6b 100755 --- a/train.sh +++ b/train.sh @@ -35,6 +35,17 @@ done echo "Removing dta19/1882-keller_sinngedicht/04970.nrm.png (Broken PNG)" rm -f $TMPDIR/dta19/1882-keller_sinngedicht/04970.* + +# If we're just testing, keep just some files +if [ "$TEST" = 1 ]; then + num_pngs_wanted=2000 + num_pngs=`find "$TMPDIR" -path "$TMPDIR/*/*/*.png" | wc -l` + num_pngs_to_delete=$(($num_pngs-$num_pngs_wanted)) + echo "TEST = 1, Reducing dataset from $num_pngs to $num_pngs_wanted PNG files" + find "$TMPDIR" -path "$TMPDIR/*/*/*.png" | shuf -n $num_pngs_to_delete | xargs rm +fi + + export PYTHONUNBUFFERED=1 # For python + tee outdir=$DATA_SUBDIR/calamari-models/GT4HistOCR