diff --git a/data b/data index f817209..b0c0cd0 160000 --- a/data +++ b/data @@ -1 +1 @@ -Subproject commit f817209ba765464adb132a132774ea7856d53f4e +Subproject commit b0c0cd08551ba1828ea73833034188047f3e14ab diff --git a/requirements.txt b/requirements.txt index 21daa14..5f95a5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -calamari-ocr==0.3.5 -tensorflow-gpu==1.13.1 +calamari-ocr==1.0.1 +tensorflow-gpu==2.0.0 diff --git a/train.sh b/train.sh index d8e1eb8..21c9b6b 100755 --- a/train.sh +++ b/train.sh @@ -35,6 +35,17 @@ done echo "Removing dta19/1882-keller_sinngedicht/04970.nrm.png (Broken PNG)" rm -f $TMPDIR/dta19/1882-keller_sinngedicht/04970.* + +# If we're just testing, keep just some files +if [ "$TEST" = 1 ]; then + num_pngs_wanted=2000 + num_pngs=`find "$TMPDIR" -path "$TMPDIR/*/*/*.png" | wc -l` + num_pngs_to_delete=$(($num_pngs-$num_pngs_wanted)) + echo "TEST = 1, Reducing dataset from $num_pngs to $num_pngs_wanted PNG files" + find "$TMPDIR" -path "$TMPDIR/*/*/*.png" | shuf -n $num_pngs_to_delete | xargs rm +fi + + export PYTHONUNBUFFERED=1 # For python + tee outdir=$DATA_SUBDIR/calamari-models/GT4HistOCR