From eb64cc030f255e3da1d7bae6933a4f7cad2fff6c Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Thu, 5 Dec 2019 22:01:47 +0100
Subject: [PATCH 01/18] Create LICENSE

---
 LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

From b22a81297998ced8234120755899f099f52f402d Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Thu, 5 Dec 2019 22:06:44 +0100
Subject: [PATCH 02/18] Improve README.md

---
 README.md | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index d3da05f..b9317bb 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,17 @@
 # Textline-Recognition
 
 ***
-# Tool
-This tool does textline detection of image and throw result as xml data.
-
-# Models
-In order to run this tool you need corresponding models. You can find them here:
-
-https://file.spk-berlin.de:8443/textline_detection/
+# Introduction
+This tool performs textline detection from image data and returns the results as PAGE-XML.
 
 # Installation
 
-sudo pip install .
-
-# Usage
-
-sbb_textline_detector -i 'image file name' -o 'directory to write output xml' -m 'directory of models'
-
-
-
+`sudo pip install .`
 
+# Models
+In order to run this tool you also need trained models. You can download them here:   
+https://file.spk-berlin.de:8443/textline_detection/
 
+# Usage
 
+`sbb_textline_detector -i <image file name> -o <directory to write output xml> -m <directory of models>`

From 58f5d2b3c598153cc789e856d1c5ae92f1c62a11 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Thu, 5 Dec 2019 22:11:16 +0100
Subject: [PATCH 03/18] Update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 42de57a..b6b8cfa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-opencv-python
+opencv-python-headless
 numpy
 matplotlib
 seaborn

From d90dad48fdf4dde7138441ad50d7fbbb44f964d1 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Thu, 5 Dec 2019 22:24:28 +0100
Subject: [PATCH 04/18] PAGE2019

---
 qurator/sbb_textline_detector/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index e70e475..00181a7 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -1236,9 +1236,9 @@ class textlineerkenner:
         # create the file structure
         data = ET.Element('PcGts')
 
-        data.set('xmlns',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15")
+        data.set('xmlns',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")
         data.set('xmlns:xsi',"http://www.w3.org/2001/XMLSchema-instance")
-        data.set('xsi:schemaLocation',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2017-07-15")
+        data.set('xsi:schemaLocation',"http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15")
 
 
 

From e696a068cb5f7ba8425f57ef1790dd19abc80b7e Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 00:20:34 +0100
Subject: [PATCH 05/18] Fix typos

---
 qurator/sbb_textline_detector/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index e70e475..249bf1b 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -70,7 +70,7 @@ class textlineerkenner:
                     np.array([point for point in polygon.exterior.coords], dtype=np.uint))
         return found_polygons_early
 
-    def filter_contours_area_of_image(self, image, contours, hirarchy, max_area, min_area):
+    def filter_contours_area_of_image(self, image, contours, hierarchy, max_area, min_area):
         found_polygons_early = list()
 
         jv = 0
@@ -81,13 +81,13 @@ class textlineerkenner:
             polygon = geometry.Polygon([point[0] for point in c])
             area = polygon.area
             if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(
-                    image.shape[:2]) and hirarchy[0][jv][3] == -1 :  # and hirarchy[0][jv][3]==-1 :
+                    image.shape[:2]) and hierarchy[0][jv][3] == -1 :  # and hierarchy[0][jv][3]==-1 :
                 found_polygons_early.append(
                     np.array([ [point] for point in polygon.exterior.coords], dtype=np.uint))
             jv += 1
         return found_polygons_early
 
-    def filter_contours_area_of_image_interiors(self, image, contours, hirarchy, max_area, min_area):
+    def filter_contours_area_of_image_interiors(self, image, contours, hierarchy, max_area, min_area):
         found_polygons_early = list()
 
         jv = 0
@@ -98,7 +98,7 @@ class textlineerkenner:
             polygon = geometry.Polygon([point[0] for point in c])
             area = polygon.area
             if area >= min_area * np.prod(image.shape[:2]) and area <= max_area * np.prod(image.shape[:2]) and \
-                    hirarchy[0][jv][3] != -1:
+                    hierarchy[0][jv][3] != -1:
                 # print(c[0][0][1])
                 found_polygons_early.append(
                     np.array([point for point in polygon.exterior.coords], dtype=np.uint))
@@ -486,9 +486,9 @@ class textlineerkenner:
 
         _, thresh = cv2.threshold(imgray, 0, 255, 0)
 
-        contours, hirarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        contours, hierarchy = cv2.findContours(thresh.copy(), cv2.cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
         
-        main_contours = self.filter_contours_area_of_image(thresh, contours, hirarchy, max_area=1, min_area=0.00001)
+        main_contours = self.filter_contours_area_of_image(thresh, contours, hierarchy, max_area=1, min_area=0.00001)
         self.boxes = []
         
         for jj in range(len(main_contours)):

From 2ecb0218701051e68d661eaa55acbcd62ffe4764 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 00:35:11 +0100
Subject: [PATCH 06/18] refactor class name

---
 qurator/sbb_textline_detector/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index e70e475..245b70b 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -38,7 +38,7 @@ __doc__ = \
     """
 
 
-class textlineerkenner:
+class textline_detector:
     def __init__(self, image_dir, dir_out, f_name, dir_models):
         self.image_dir = image_dir  # XXX This does not seem to be a directory as the name suggests, but a file
         self.dir_out = dir_out
@@ -1475,7 +1475,7 @@ class textlineerkenner:
 def main(image, out, model):
     possibles = globals()  # XXX unused?
     possibles.update(locals())
-    x = textlineerkenner(image, out, None, model)
+    x = textline_detector(image, out, None, model)
     x.run()
 
 

From c8bc46862817493258ec24d39c8e5ccd098c6617 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 00:40:05 +0100
Subject: [PATCH 07/18] fix docstring

---
 qurator/sbb_textline_detector/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index e70e475..5b2473f 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -34,7 +34,7 @@ with warnings.catch_warnings():
 
 __doc__ = \
     """
-    tool to extract table form data from alto xml data
+    tool to extract text lines from document images
     """
 
 

From 02388a759dbe9f05b81189ecf46a27967ff9dc80 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 00:47:53 +0100
Subject: [PATCH 08/18] Update README.md

---
 README.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index b9317bb..162d471 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,16 @@
 # Textline-Recognition
 
-***
-# Introduction
-This tool performs textline detection from image data and returns the results as PAGE-XML.
+## Introduction
+This tool performs textline detection from document image data and returns the results as PAGE-XML.
 
-# Installation
+## Installation
 
 `sudo pip install .`
 
-# Models
-In order to run this tool you also need trained models. You can download them here:   
+## Models
+In order to run this tool you also need trained models. You can download our pre-trained models from here:   
 https://file.spk-berlin.de:8443/textline_detection/
 
-# Usage
+## Usage
 
 `sbb_textline_detector -i <image file name> -o <directory to write output xml> -m <directory of models>`

From 5113d28e13b7be21089b9f4c5553d4e7b757bc50 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 00:48:38 +0100
Subject: [PATCH 09/18] do not require sudo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 162d471..729ab12 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ This tool performs textline detection from document image data and returns the r
 
 ## Installation
 
-`sudo pip install .`
+`pip install .`
 
 ## Models
 In order to run this tool you also need trained models. You can download our pre-trained models from here:   

From 6c0bfba686c49da2e78a020e0b80c384413ab397 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 02:21:04 +0100
Subject: [PATCH 10/18] fix typos

---
 qurator/sbb_textline_detector/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index 249bf1b..3813bda 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -1378,7 +1378,7 @@ class textlineerkenner:
     
     def run(self):
         
-        #get image and sclaes, then extract the page of scanned image
+        #get image and scales, then extract the page of scanned image
         t1=time.time()
         self.get_image_and_scales()
         image_page,page_coord=self.extract_page()

From 3b526ef40d9c4d01a61a83cce44ae6011818df5d Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 02:27:23 +0100
Subject: [PATCH 11/18] refactor class name

---
 qurator/sbb_textline_detector/ocrd_cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qurator/sbb_textline_detector/ocrd_cli.py b/qurator/sbb_textline_detector/ocrd_cli.py
index d090e46..272d671 100644
--- a/qurator/sbb_textline_detector/ocrd_cli.py
+++ b/qurator/sbb_textline_detector/ocrd_cli.py
@@ -12,7 +12,7 @@ from ocrd_models.ocrd_page_generateds import MetadataItemType, LabelsType, Label
 from ocrd_utils import concat_padded, getLogger, MIMETYPE_PAGE
 from pkg_resources import resource_string
 
-from qurator.sbb_textline_detector import textlineerkenner
+from qurator.sbb_textline_detector import textline_detector
 
 log = getLogger('processor.OcrdSbbTextlineDetectorRecognize')
 
@@ -67,7 +67,7 @@ class OcrdSbbTextlineDetectorRecognize(Processor):
                 # Segment the image
                 image_file = self._resolve_image_file(input_file)
                 model = self.parameter['model']
-                x = textlineerkenner(image_file, tmp_dirname, file_id, model)
+                x = textline_detector(image_file, tmp_dirname, file_id, model)
                 x.run()
 
                 # Read segmentation results

From 3935204338fb977ba4429b7af93817c4006cc827 Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Fri, 6 Dec 2019 11:42:23 +0100
Subject: [PATCH 12/18] =?UTF-8?q?=F0=9F=93=9D=20sbb=5Ftextline=5Fdetector:?=
 =?UTF-8?q?=20Document=20OCR-D=20Usage?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index d3da05f..a8905be 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,14 @@ sudo pip install .
 sbb_textline_detector -i 'image file name' -o 'directory to write output xml' -m 'directory of models'
 
 
-
-
-
-
+## Usage with OCR-D
+~~~
+ocrd-example-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN
+ocrd_sbb_textline_detector -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE-SBB -p '{ "model": "/path/to/the/models/textline_detection" }'
+~~~
+
+Segmentation works on raw RGB images, but respects and retains
+`AlternativeImage`s from binarization steps, so it's a good idea to do
+binarization first, then perform the textline detection. The used binarization
+processor must produce an `AlternativeImage` for the binarized image, not
+replace the original raw RGB image.

From eb4c8ee99c68a948cbfdd05d6d4ce3280b21b7c0 Mon Sep 17 00:00:00 2001
From: "Gerber, Mike" <mike.gerber@sbb.spk-berlin.de>
Date: Fri, 6 Dec 2019 12:34:15 +0100
Subject: [PATCH 13/18] =?UTF-8?q?=F0=9F=93=9D=20sbb=5Ftextline=5Fdetector:?=
 =?UTF-8?q?=20Break=20long=20line=20for=20ocrd=5Fsbb=5Ftextline=5Fdetector?=
 =?UTF-8?q?=20example?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a8905be..e7b28fd 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,8 @@ sbb_textline_detector -i 'image file name' -o 'directory to write output xml' -m
 ## Usage with OCR-D
 ~~~
 ocrd-example-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN
-ocrd_sbb_textline_detector -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE-SBB -p '{ "model": "/path/to/the/models/textline_detection" }'
+ocrd_sbb_textline_detector -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE-SBB \
+        -p '{ "model": "/path/to/the/models/textline_detection" }'
 ~~~
 
 Segmentation works on raw RGB images, but respects and retains

From b6ca1a7c5368e8611a90709f3e1813846f1fa0eb Mon Sep 17 00:00:00 2001
From: Konstantin Baierer <unixprog@gmail.com>
Date: Fri, 6 Dec 2019 18:21:00 +0100
Subject: [PATCH 14/18] kebab-case snake_case executable, fix #9

---
 .gitignore                                   | 2 ++
 qurator/sbb_textline_detector/ocrd-tool.json | 4 ++--
 setup.py                                     | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3fafd07
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+*.egg-info
diff --git a/qurator/sbb_textline_detector/ocrd-tool.json b/qurator/sbb_textline_detector/ocrd-tool.json
index b76f439..241f551 100644
--- a/qurator/sbb_textline_detector/ocrd-tool.json
+++ b/qurator/sbb_textline_detector/ocrd-tool.json
@@ -1,8 +1,8 @@
 {
   "version": "0.0.1",
   "tools": {
-    "ocrd_sbb_textline_detector": {
-      "executable": "ocrd_sbb_textline_detector",
+    "ocrd-sbb-textline-detector": {
+      "executable": "ocrd-sbb-textline-detector",
       "description": "Detect lines",
       "steps": ["layout/segmentation/line"],
       "input_file_grp": [
diff --git a/setup.py b/setup.py
index 1c9075f..92c88cf 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ setup(
     entry_points={
       'console_scripts': [
         "sbb_textline_detector=qurator.sbb_textline_detector:main",
-        "ocrd_sbb_textline_detector=qurator.sbb_textline_detector:ocrd_sbb_textline_detector",
+        "ocrd-sbb-textline-detector=qurator.sbb_textline_detector:ocrd_sbb_textline_detector",
       ]
     },
     python_requires='>=3.6.0',

From 7c7f035b69bb4bf273e60bd0d3663bb2ea425384 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 19:02:50 +0100
Subject: [PATCH 15/18] matplotlib implies numpy

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b6b8cfa..e915936 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
 opencv-python-headless
-numpy
 matplotlib
 seaborn
 tqdm

From 9b784e3a81062231899c88d3bb1fd9a5b703871b Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 19:03:10 +0100
Subject: [PATCH 16/18] ocrd implies click

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index e915936..9240226 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,5 +7,4 @@ shapely
 scikit-learn
 tensorflow-gpu < 2.0
 scipy
-click
 ocrd >= 2.0.0

From be7b101f39dff67f20bbee97046eabc54b83d336 Mon Sep 17 00:00:00 2001
From: Clemens Neudecker <952378+cneud@users.noreply.github.com>
Date: Fri, 6 Dec 2019 20:06:43 +0100
Subject: [PATCH 17/18] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index bf1fd89..46a8295 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Textline-Recognition
+# Textline Detection
 
 ## Introduction
 This tool performs textline detection from document image data and returns the results as PAGE-XML.
@@ -19,7 +19,7 @@ https://file.spk-berlin.de:8443/textline_detection/
 
 ~~~
 ocrd-example-binarize -I OCR-D-IMG -O OCR-D-IMG-BIN
-ocrd_sbb_textline_detector -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE-SBB \
+ocrd-sbb-textline-detector -I OCR-D-IMG-BIN -O OCR-D-SEG-LINE-SBB \
         -p '{ "model": "/path/to/the/models/textline_detection" }'
 ~~~
 
@@ -27,4 +27,4 @@ Segmentation works on raw RGB images, but respects and retains
 `AlternativeImage`s from binarization steps, so it's a good idea to do
 binarization first, then perform the textline detection. The used binarization
 processor must produce an `AlternativeImage` for the binarized image, not
-replace the original raw RGB image.
\ No newline at end of file
+replace the original raw RGB image.

From ea3e7737acaa35e8adbffcac35deb39691eb7ded Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Fri, 6 Dec 2019 23:42:51 +0100
Subject: [PATCH 18/18] Fix more typos (found by codespell)

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 qurator/sbb_textline_detector/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qurator/sbb_textline_detector/main.py b/qurator/sbb_textline_detector/main.py
index 18aeaa5..6f6c516 100644
--- a/qurator/sbb_textline_detector/main.py
+++ b/qurator/sbb_textline_detector/main.py
@@ -916,8 +916,8 @@ class textlineerkenner:
         image_box_tabels=image_box_tabels.astype(np.uint8)
         imgray = cv2.cvtColor(image_box_tabels, cv2.COLOR_BGR2GRAY)
         ret, thresh = cv2.threshold(imgray, 0, 255, 0)
-        contours,hierachy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
-        return contours,hierachy
+        contours,hierarchy=cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
+        return contours,hierarchy
     
     def find_contours_mean_y_diff(self,contours_main):
         M_main=[cv2.moments(contours_main[j]) for j in range(len(contours_main))]