mirror of
https://github.com/qurator-spk/eynollah.git
synced 2026-03-02 05:11:57 +01:00
docs/train: document --missing-printspace=project
This commit is contained in:
parent
4202a1b2db
commit
42bab0f935
2 changed files with 10 additions and 1 deletions
|
|
@ -271,6 +271,11 @@ eynollah-training generate-gt pagexml2label \
|
||||||
-doi "dir of output cropped images"
|
-doi "dir of output cropped images"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Also, note that it can be detrimental to layout training if there are visible segments which
|
||||||
|
the annotation does not account for (and thus the model must learn to ignore). So if the images
|
||||||
|
are not cropped, the `-ps` _should_ be used. If a PAGE XML file is missing `PrintSpace` (or `Border`)
|
||||||
|
annotations, use `-mps` to either `skip` these or `project` (i.e. crop from existing segments).
|
||||||
|
|
||||||
## Train a model
|
## Train a model
|
||||||
|
|
||||||
### classification
|
### classification
|
||||||
|
|
|
||||||
|
|
@ -724,7 +724,11 @@ def get_images_of_ground_truth(
|
||||||
print(gt_list[index], "has no Border or PrintSpace - projecting hull of segments")
|
print(gt_list[index], "has no Border or PrintSpace - projecting hull of segments")
|
||||||
bboxes = list(map(bbox_from_points, coords))
|
bboxes = list(map(bbox_from_points, coords))
|
||||||
left, top, right, bottom = zip(*bboxes)
|
left, top, right, bottom = zip(*bboxes)
|
||||||
ps_bbox = [min(left), min(top), max(right), max(bottom)]
|
left = max(0, min(left) - 5)
|
||||||
|
top = max(0, min(top) - 5)
|
||||||
|
right = min(x_len, max(right) + 5)
|
||||||
|
bottom = min(y_len, max(bottom) + 5)
|
||||||
|
ps_bbox = [left, top, right, bottom]
|
||||||
else:
|
else:
|
||||||
print(gt_list[index], "has no Border or PrintSpace - using full page")
|
print(gt_list[index], "has no Border or PrintSpace - using full page")
|
||||||
ps_bbox = [0, 0, None, None]
|
ps_bbox = [0, 0, None, None]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue